├── Chapter -1 -- Python for SAS Users Chapters.ipynb ├── Chapter 00 -- Motivation.ipynb ├── Chapter 01 -- Introduction.ipynb ├── Chapter 02 -- Data Structures.ipynb ├── Chapter 03 -- Data Types and Formatting.ipynb ├── Chapter 04 -- Pandas, Part 1.ipynb ├── Chapter 05 -- Understanding Indexes.ipynb ├── Chapter 06 -- Hierarchical Indexing.ipynb ├── Chapter 07 -- Pandas, Part 2.ipynb ├── Chapter 08 -- Date, Time, and Timedelta Objects.ipynb ├── Chapter 09 -- Panda Time Series and Date Handling.ipynb ├── Chapter 10 -- Groupby.ipynb ├── Chapter 11 -- Panda Readers.ipynb ├── Chapter 12 -- Additional Data Handling .ipynb ├── Data_Interchange_not_working_with_sas.df2sd.ipynb ├── README.md ├── data ├── Accidents_2015.csv ├── Duplicate_Timestamps.csv ├── February_2018.csv ├── February_2018.xlsx ├── HPI_master.csv ├── January_2018.csv ├── January_2018.xlsx ├── LC_Loan_Stats.csv ├── Left.csv ├── Loans_lc0.csv ├── Loans_lc1.csv ├── March_2018.csv ├── March_2018.xlsx ├── Right.csv ├── Road-Accident-Safety-Data-Guide.xls ├── Sales_Detail.csv ├── messy_input.csv ├── messy_input.xlsx ├── tickets.csv └── uk_accidents.csv └── saspy_Module.pdf /Chapter -1 -- Python for SAS Users Chapters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python for SAS Users" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Chapter orginization\n", 15 | "\n", 16 | "These chapters are meant to be read in order as they start with foundational concepts used to build up more complex ideas. \n", 17 | "\n", 18 | "Chapter 00 -- Motivation \n", 19 | "\n", 20 | "Chapter 01 -- Introduction \n", 21 | "\n", 22 | "Chapter 02 -- Data Sctructures \n", 23 | "\n", 24 | " list \n", 25 | "\n", 26 | " indexing \n", 27 | "\n", 28 | " tuple \n", 29 | "\n", 30 | " dictionary \n", 31 | "\n", 32 | " sequences \n", 33 | "\n", 34 | " set \n", 35 | "\n", 36 | " Resources\n", 37 | " \n", 38 | "Chapter 03 -- Data Types and Formatting \n", 39 | "\n", 40 | " Numerics \n", 41 | "\n", 42 | " Boolean \n", 43 | "\n", 44 | " Numerical Precision\n", 45 | "\n", 46 | " Strings \n", 47 | "\n", 48 | " String Slicing \n", 49 | "\n", 50 | " String Formatting \n", 51 | "\n", 52 | " Resources \n", 53 | "\n", 54 | "\n", 55 | "Chapter 04 -- Pandas, Part 1 \n", 56 | "\n", 57 | " Importing Packages\n", 58 | "\n", 59 | " Series \n", 60 | "\n", 61 | " DataFrames \n", 62 | "\n", 63 | " Read .csv files \n", 64 | "\n", 65 | " Inspection \n", 66 | "\n", 67 | " Missing Values Identification\n", 68 | "\n", 69 | " Missing Value Replacement \n", 70 | "\n", 71 | " Resources \n", 72 | " \n", 73 | "\n", 74 | "Chapter 05 -- Understanding Indexes\n", 75 | " \n", 76 | " Indices \n", 77 | "\n", 78 | " .iloc indexer \n", 79 | "\n", 80 | " Setting and resetting Indicies\n", 81 | "\n", 82 | " .loc indexer \n", 83 | "\n", 84 | " Mixing .loc indexer with Boolean Operations \n", 85 | "\n", 86 | " Altering DataFrame values using the .loc indexer \n", 87 | "\n", 88 | " Conditionaly Apply Values Based on Another Column Value \n", 89 | "\n", 90 | " .ix indexer \n", 91 | "\n", 92 | " Indexing Issues \n", 93 | "\n", 94 | " Resources \n", 95 | " \n", 96 | "\n", 97 | "Chapter 06 Hierarchical Indexing \n", 98 | "\n", 99 | " Multi Indexed Selection\n", 100 | "\n", 101 | " xs() method for Cross-sections\n", 102 | "\n", 103 | " Advanced Indexing with .loc Indexer\n", 104 | "\n", 105 | " Using Boolean Operators with .loc Indexer\n", 106 | "\n", 107 | " stack() and unstack() methods\n", 108 | "\n", 109 | " Resources\n", 110 | "\n", 111 | "\n", 112 | "Chapter 07 -- Pandas, Part 2\n", 113 | "\n", 114 | " SAS Sort/Merge with by-groups\n", 115 | "\n", 116 | " Inner Join\n", 117 | "\n", 118 | " Right Outer Join\n", 119 | "\n", 120 | " Left Outer Join\n", 121 | "\n", 122 | " Full Outer Join\n", 123 | "\n", 124 | " Outer Join no Matched Keys\n", 125 | "\n", 126 | " Outer Join no Matched Keys in Right\n", 127 | "\n", 128 | " No Matched Keys in Left\n", 129 | "\n", 130 | " Many-to-Many Join\n", 131 | "\n", 132 | "\n", 133 | " GroupBy: Split-Apply-Combine Introduction\n", 134 | "\n", 135 | " Replace Missing Values with Group Mean\n", 136 | "\n", 137 | " FIRST.variable and LAST.variable Processing\n", 138 | "\n", 139 | " Resources\n", 140 | "\n", 141 | "Chapter 08 -- Date, Time and Timepart Objects\n", 142 | "\n", 143 | " String Literal Mapped to datetime timestamp\n", 144 | "\n", 145 | " date objects\n", 146 | "\n", 147 | " strfime() and strptime() methods\n", 148 | "\n", 149 | " dateutil.parser\n", 150 | "\n", 151 | " time objects\n", 152 | "\n", 153 | " timedelta objects\n", 154 | "\n", 155 | " Resources\n", 156 | "\n", 157 | "Chapter 09 -- Panda Time Series and Date Handling\n", 158 | "\n", 159 | " Creating and Manipulating a Fixed Frequency of Dates and Time Spans\n", 160 | "\n", 161 | " Time-Series Walk-through\n", 162 | "\n", 163 | " Returning Unique Levels of Categories\n", 164 | "\n", 165 | " Return a Row Using a Minimum Value\n", 166 | "\n", 167 | "\n", 168 | " Return a Row Using a Maximum Value\n", 169 | "\n", 170 | " Convert Time-Series from one Frequency to Another \n", 171 | "\n", 172 | " Plotting with bokeh\n", 173 | "\n", 174 | " Resources\n", 175 | "\n", 176 | "Chapter 10 -- GroupBy\n", 177 | "\n", 178 | " Setting Display Options\n", 179 | " \n", 180 | " Read 'pickled' DataFrame\n", 181 | "\n", 182 | " Create GroupBy Object\n", 183 | "\n", 184 | " GroupBy with Aggregations\n", 185 | "\n", 186 | " Understanding Binning\n", 187 | "\n", 188 | " Applying Functions to Groups\n", 189 | "\n", 190 | " Applying Transformations to Groups\n", 191 | "\n", 192 | " Top/Bottom N Processing\n", 193 | "\n", 194 | " Resources\n", 195 | "\n", 196 | " Chapter 11 -- Panda Readers \n", 197 | "\n", 198 | " pd.read_csv(URL) method\n", 199 | "\n", 200 | " SQLAlchemy Under the Covers\n", 201 | "\n", 202 | " read_sql_table() method\n", 203 | "\n", 204 | " read_sql_query()method\n", 205 | "\n", 206 | " DataFrame.to_sql() method\n", 207 | "\n", 208 | " pd.read_sas() method\n", 209 | "\n", 210 | " Resources\n", 211 | "\n", 212 | "Chapter 12 -- Additional Data Handling\n", 213 | "\n", 214 | " Sort and Sort Sequences\n", 215 | "\n", 216 | " Drop/Keep Columns\n", 217 | "\n", 218 | " Rename Columns\n", 219 | "\n", 220 | " Find Duplicate Values\n", 221 | "\n", 222 | " Drop Duplicate Rows\n", 223 | "\n", 224 | " Extract Duplicate Values\n", 225 | "\n", 226 | " Add a New DataFrame Column\n", 227 | "\n", 228 | " Cast Strings to Float\n", 229 | "\n", 230 | " Concatenating DataFrames (Join)\n", 231 | "\n", 232 | " Crosstabs\n", 233 | "\n", 234 | " Sampling\n", 235 | "\n", 236 | " Binning Continous Values\n", 237 | "\n", 238 | " Save to Disk ('pickling')\n", 239 | "\n", 240 | " Resources\n" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Navigation\n", 248 | "\n", 249 | " Return to Chapter List " 250 | ] 251 | } 252 | ], 253 | "metadata": { 254 | "anaconda-cloud": {}, 255 | "kernelspec": { 256 | "display_name": "Python [Root]", 257 | "language": "python", 258 | "name": "Python [Root]" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": { 262 | "name": "ipython", 263 | "version": 3 264 | }, 265 | "file_extension": ".py", 266 | "mimetype": "text/x-python", 267 | "name": "python", 268 | "nbconvert_exporter": "python", 269 | "pygments_lexer": "ipython3", 270 | "version": "3.5.2" 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 0 275 | } 276 | -------------------------------------------------------------------------------- /Chapter 01 -- Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Chapter 01--Introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Why Python?\n", 15 | "\n", 16 | "There are plenty of substantive open source software projects out there for data scientists, so why choose Python? After all, there is R. R is a robust and well-supported language written initially by statistician for statisticians. \n", 17 | "\n", 18 | "The view is not to promote one solution over the other. The goal is to illustrate how the addition of Python to a SAS user’s skill set can broaden ones range of capabilities. And besides, Bob Muenchen has already written, R for SAS and SPSS Users.\n", 19 | "\n", 20 | "Python has its heritage in scientific and technical computing domains and it has a compact syntax. The latter making for a relatively easy language to learn while the former means it scales to offer good performance with massive data volumes. This is one of the reasons why Google uses it so extensively and has developed an outstanding tutorial for programmers. See Google's Python Class here." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## A quick-start\n", 28 | "Another aspect both languages have in common is the wealth of information available on the web. \n", 29 | "\n", 30 | "You would think having a plethora of content makes learning a new language a straightforward proposition. However, at times I experienced information overload. As I worked though examples, I was not sure until a good investment of time if what I was learning was applicable to my objectives. \n", 31 | "\n", 32 | "Sure there is learning for leaning’s sake. But not every tutorial or text I read was fruitful, however, most were. It was only after some time into this endeavor that I realized I needed a specific context for ingesting new information. \n", 33 | "\n", 34 | "Like most people, I want fast results. And like most SAS users, I have developed a mental model for data analysis focused on a series of iterable steps.\n", 35 | "\n", 36 | "What I was lacking was someone to identify both the content to utilize as well as the order in which it should be consumed. I wanted to initially invest time in just those topics that I needed before getting on with the task of data analysis. \n" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## The Python for SAS Users Approach\n", 44 | "\n", 45 | "\n", 46 | "A philosophical word (or two) about the merits of Python and SAS as languages. From my perspective, it is simply a question of finding the right tool for the job. Both languages have advantages and disadvantages. And since they are programming languages, their designers had to make certain tradeoffs which can manifest themselves as features or quirks, depending on one’s perspective. \n", 47 | "\n", 48 | "The goal is to provide a quick start for users already familiar with the SAS lanaguage and enable them to become familiar with Python. The choice of which tool to utilize typically comes down to a combination of what you as a user are familiar with and the context of the problem being solved." 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "The approach taken is to introduce a concept(s) in Python with a description of how the program works followed by a code cell for the Python program. This is often followed by an example program in the language of SAS to present a compare and constrast approach. Not every Python example has an analog SAS example. \n", 56 | "\n", 57 | "The Python code examples will always be inside a code cell within this notebook. To make the examples easy to follow, where reasonable, I have written the SAS output to the log or captured output as .JPG files. All of the SAS code examples are located here in 7zip compressed format. You can download a copy of 7zip from here. \n", 58 | "\n", 59 | "The SAS language programs were written and verified with Version: 3.2.2.0 of the WPS Workbench for Windows. World Programming System offers a SAS language interpreter and can be reached at: https://worldprogramming.com/us/.\n", 60 | "\n", 61 | "This approach is illustrated by the next 3 cells below. The analog SAS program is called c1_Python_for_loop.sas" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Comparing Python and SAS Code Fragments\n", 69 | "\n", 70 | "The list of numbers contained inside the square brackets [ ] make up the elements in a Python list. In Python, a list is a data structure that holds an arbitrary collection of items. i is an integer used as the index for the for loop. product holds the integer value from \n", 71 | "the arithmetic assignment of product * i Finally, the print() \n", 72 | "method writes the output. The same program is written in SAS as shown below." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 1, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "The product is: 4224\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "numbers = [2, 4, 6, 8, 11]\n", 92 | "product = 1\n", 93 | "for i in numbers:\n", 94 | " product = product * i \n", 95 | "print('The product is:', product)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | " " 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "source": [ 111 | "````\n", 112 | " /******************************************************/\n", 113 | " /* c01_python_for_loop.sas */\n", 114 | " /******************************************************/\n", 115 | " 4 data _null_;\n", 116 | " 5 \n", 117 | " 6 retain product 1;\n", 118 | " 7 do i = 2 to 8 by 2, 11;\n", 119 | " 8 product=product*i;\n", 120 | " 9 end;\n", 121 | " 10 \n", 122 | " 11 put 'The product is: ' product;\n", 123 | "\n", 124 | " The product is: 4224\n", 125 | "````" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | " " 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Python Terminology\n", 140 | "\n", 141 | "Python permits an object-oriented programming model. SAS is a procedural programming language. These examples use a procedural programming model for Python given the goal is to map SAS\n", 142 | "programming constructs into Python. \n", 143 | "\n", 144 | "This object-oriented programming model provides a number of classes with objects \n", 145 | "being instances of the class. The Python program in the cell below illustrates the int class (integers). Variable x is an instance (object) of the int class. You can execute help(int) to read more.\n", 146 | "\n", 147 | "Objects are said to belong to a class. Variables that belong to a class or objects are strictly \n", 148 | "speaking, refered to as fields. Objects have capabilities belonging to the class and are called \n", 149 | "methods(). \n", 150 | "\n", 151 | "My early experiences was that the object types I created were not always obvious from the code context. I neeed to know what type of object was being created. The type() method returns the object's type as illustrated in the cell below.\n", 152 | "\n", 153 | "Python has a number of built-in functions and types that are always available which are documented here. Later on, we will see how Python expands its capabilities through importing packages (libraries)." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 2, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "201\n", 168 | "\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "x = 201;\n", 174 | "print(x)\n", 175 | "print(type(x))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Object References\n", 183 | "\n", 184 | "Consider the program in the cell below. Don't worry about the syntax for now. a_list is a Python list object. The a_list list is copied to another list object using the assignment:\n", 185 | "\n", 186 | "````python\n", 187 | " b_list = a_list\n", 188 | "````\n", 189 | "It turns out that while a_list and b_list are equivalent, they both point to the same memory location. In other words, b_list refers to the object a_list and does not represent the object itself. \n", 190 | "\n", 191 | "The program statement:\n", 192 | "\n", 193 | "````python\n", 194 | " del a_list[0]\n", 195 | "````\n", 196 | "removes the first item from the a_list list object. When we print the list objects, you see how both have the first item removed. The effect is subtle, but not one you will likely encounter a great deal. But certainly a subtly to be aware of." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 3, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "a_list is: ['onyx', 'zebra', 'money', 'lemur']\n", 211 | "b_list is: ['onyx', 'zebra', 'money', 'lemur']\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "a_list = ['elephant', 'onyx', 'zebra', 'money', 'lemur']\n", 217 | "\n", 218 | "# b_list is another name pointing to the same object\n", 219 | "b_list = a_list\n", 220 | "\n", 221 | "# remove the first item in a_list\n", 222 | "del a_list[0]\n", 223 | "\n", 224 | "# print both lists\n", 225 | "print('a_list is:', a_list)\n", 226 | "print('b_list is:', b_list)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | " " 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "## The Zen of Python, by Tim Peters" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 11, 246 | "metadata": { 247 | "collapsed": false 248 | }, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "The Zen of Python, by Tim Peters\n", 255 | "\n", 256 | "Beautiful is better than ugly.\n", 257 | "Explicit is better than implicit.\n", 258 | "Simple is better than complex.\n", 259 | "Complex is better than complicated.\n", 260 | "Flat is better than nested.\n", 261 | "Sparse is better than dense.\n", 262 | "Readability counts.\n", 263 | "Special cases aren't special enough to break the rules.\n", 264 | "Although practicality beats purity.\n", 265 | "Errors should never pass silently.\n", 266 | "Unless explicitly silenced.\n", 267 | "In the face of ambiguity, refuse the temptation to guess.\n", 268 | "There should be one-- and preferably only one --obvious way to do it.\n", 269 | "Although that way may not be obvious at first unless you're Dutch.\n", 270 | "Now is better than never.\n", 271 | "Although never is often better than *right* now.\n", 272 | "If the implementation is hard to explain, it's a bad idea.\n", 273 | "If the implementation is easy to explain, it may be a good idea.\n", 274 | "Namespaces are one honking great idea -- let's do more of those!\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "import this\n", 280 | "import codecs\n", 281 | "\n", 282 | "print(codecs.decode(this.s, 'rot-13'))" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | " " 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Ledgibility, Indentation, and Spelling Matter\n", 297 | "\n", 298 | "To quote, Eric Raymond, \"A language that makes it hard to write elegant code makes it \n", 299 | "hard to write good code.\" From his essay, entitled, \"Why Python\", located at: \n", 300 | "\n", 301 | "The Python program in the cell below is the same as the one 6 cells above, with one exception. The line after the for block is **not** indented. This results in the interpreter raising the error:\n", 302 | "\n", 303 | "IndentationError: expected an indented block\n", 304 | "\n", 305 | "Once you get over the shock of how Python imposes the indentation requirements, you will come to see how this is an important feature used to create legible and easy-to-understand code. \n", 306 | "\n", 307 | "Notice also there appear to be no symbols used to end a program statement. The end-of-line character is used to end a Python statement. This also helps to enforce legibility by keeping each statement on a separate physical line.\n", 308 | "\n", 309 | "Coincidently, like SAS, Python will also honor a semi-colon as an end of statement terminator. However, you rarely see this. That's because multiple statements on the same physical line is considered an affront to program legibility. " 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | " " 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 4, 322 | "metadata": { 323 | "collapsed": false 324 | }, 325 | "outputs": [ 326 | { 327 | "ename": "IndentationError", 328 | "evalue": "expected an indented block (, line 6)", 329 | "output_type": "error", 330 | "traceback": [ 331 | "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m6\u001b[0m\n\u001b[1;33m product = product * i\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mIndentationError\u001b[0m\u001b[1;31m:\u001b[0m expected an indented block\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "numbers = [2, 4, 6, 8, 11]\n", 337 | "product = 1\n", 338 | "for i in numbers:\n", 339 | "product = product * i \n", 340 | "print('The product is:', product)\n" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## Line continuation symbol\n", 348 | "\n", 349 | "Should you find you have a line of code that needs to extend past the physical line (i.e. wrap), then use the backslash (\\\\). This causes the Python interpreter to ignore the physical end-of-line terminator on the current line and continuing scanning for the next end-of-line terminator." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 5, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "The product is: 607711104\n" 364 | ] 365 | } 366 | ], 367 | "source": [ 368 | "numbers = [2, 4, 6, 8, 11, 13, 21, \\\n", 369 | " 17, 31]\n", 370 | "product = 1\n", 371 | "for i in numbers:\n", 372 | " product = product * i \n", 373 | "print('The product is:', product)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | " " 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "## Spelling\n", 388 | "\n", 389 | "Of course, the incorrect spelling of keywords is a source of error. Unlike SAS, in Python, object names are case sensetive." 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 6, 395 | "metadata": { 396 | "collapsed": false 397 | }, 398 | "outputs": [ 399 | { 400 | "ename": "NameError", 401 | "evalue": "name 'y' is not defined", 402 | "output_type": "error", 403 | "traceback": [ 404 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 405 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 406 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mY\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m201\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 407 | "\u001b[1;31mNameError\u001b[0m: name 'y' is not defined" 408 | ] 409 | } 410 | ], 411 | "source": [ 412 | "Y = 201\n", 413 | "print(y)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "SAS keywords and variable names are case insensetive. " 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "````\n", 428 | " /******************************************************/\n", 429 | " /* c01_python_names_case_sensetive.sas */\n", 430 | " /******************************************************/\n", 431 | " 4 data _null_;\n", 432 | " 5 \n", 433 | " 6 X = 201;\n", 434 | " 7 put x ;\n", 435 | "\n", 436 | " 201\n", 437 | "````" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "Finally, a word about name choices. Names should be descriptive because more than likely you will be one who has to re-read and understand tomorrow the code you write today. As with any language, it is a good practice to avoid language keywords for object names." 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "## Navigation\n", 452 | "\n", 453 | " Return to Chapter List " 454 | ] 455 | } 456 | ], 457 | "metadata": { 458 | "anaconda-cloud": {}, 459 | "kernelspec": { 460 | "display_name": "Python [Root]", 461 | "language": "python", 462 | "name": "Python [Root]" 463 | }, 464 | "language_info": { 465 | "codemirror_mode": { 466 | "name": "ipython", 467 | "version": 3 468 | }, 469 | "file_extension": ".py", 470 | "mimetype": "text/x-python", 471 | "name": "python", 472 | "nbconvert_exporter": "python", 473 | "pygments_lexer": "ipython3", 474 | "version": "3.5.2" 475 | } 476 | }, 477 | "nbformat": 4, 478 | "nbformat_minor": 0 479 | } 480 | -------------------------------------------------------------------------------- /Chapter 02 -- Data Structures.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Chapter 02 -- Data Structures" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Topics Covered:\n", 15 | "\n", 16 | "list \n", 17 | "\n", 18 | "indexing \n", 19 | "\n", 20 | "tuple \n", 21 | "\n", 22 | "dictionary \n", 23 | "\n", 24 | "sequences \n", 25 | "\n", 26 | "set \n", 27 | "\n", 28 | "Resources\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "This chapter briefly touches on Python's data structures. As the name suggests, data structures are containers for data and other objects. We introduce these objects since they form the building-blocks for other structures discussed in subsequent chapters.\n", 36 | "\n", 37 | "A cursory examination is needed, so a quick review is in order. Later, you will want to refer back to these since they aid the creation of DataFrames and other useful objects. DataFrames are built on top of Python and Numpy modules which are the predominate structures discussed in Chapter 4 onwards. " 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Python has four main data structures. There are:\n", 45 | " \n", 46 | " 1. list\n", 47 | " 2. tuple\n", 48 | " 3. set\n", 49 | " 4. dictionary" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## list\n", 57 | "\n", 58 | "A list contains an ordered collection of items. You determine the order. In Python, items in a list are separarated by commas and enclosed in square brackets. Lists are mutable, meaning you can add, remove, or alter items. " 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | " " 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 1, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "There are 5 items in \"a_list\"\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "a_list = ['ale', 'lager', 'stout', 'hefeweizen', 'stout']\n", 85 | "print('There are', len(a_list), 'items in \"a_list\"')" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | " " 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "In the example below, the parameter:\n", 100 | "\n", 101 | " end=' '\n", 102 | "\n", 103 | "for the print() method suppresses the default new-line character (CR/LF)." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | " " 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 2, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "A list of beers include:\n", 125 | "ale lager stout hefeweizen stout " 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "print(\"A list of beers include:\")\n", 131 | "for i in a_list:\n", 132 | " print(i, end=' ')" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Indexing\n", 140 | "\n", 141 | "Python provides indexing methods for a number of objects, including lists. SAS has a similiar construct \\_n\\_ for the Data Step. Both indexing methods act as keys providing access to an individual item (or set of items). In Python's case, the default start index position is 0, and for SAS it is 1.\n", 142 | "\n", 143 | "The SAS code example in the cell below is an imperfect analogy for a Python list, since the SAS logic uses a variable to hold the beer_type values. Nonetheless, each program illustrates access to an array 'item' by indexing.\n", 144 | "\n", 145 | "Chapter 05 -- Understanding Indexes\" has more details." 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Python uses zero as its index start position, in contrast to SAS' \\_n\\_ which uses a start position of 1. Almost every example of SAS DO loops, by convention, use a start position of 1." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 3, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "Value for beer type is: ale\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "print('Value for beer type is:', a_list[0])" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | " " 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "````\n", 186 | " /******************************************************/\n", 187 | " /* c02_retrieve_list_item_by_index_position.sas */\n", 188 | " /******************************************************/\n", 189 | " 4 data beers;\n", 190 | " 5 length beer_type $ 10;\n", 191 | " 6 input beer_type $ @@;\n", 192 | " 7 \n", 193 | " 8 if _n_ = 1 then\n", 194 | " 9 put 'Value for beer type is: ' beer_type;\n", 195 | " 10 \n", 196 | " 11 list;\n", 197 | " 12 datalines;\n", 198 | "\n", 199 | " Value for beer type is: ale\n", 200 | " RULE: ----+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+-\n", 201 | " 13 ale lager stout hefeweizen stout \n", 202 | "```` " 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | " " 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "The next two examples illustrate the .append() and .sort() attributes for a list. They produce no visible output, so use the print() method to view results." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 4, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "a_list.append('malt')" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | " " 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 5, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "a_list.sort(reverse=True)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | " " 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "A for statement used to interate along the items in the list. For statements are documented here." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 6, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "A list of beers include:\n", 274 | "stout stout malt lager hefeweizen ale " 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "print(\"A list of beers include:\")\n", 280 | "for i in a_list:\n", 281 | " print(i, end=' ')" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | " " 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "The .count() attribute returns the number of items in an object. Here it is used to return the number of occurrences of a value by having the list element as an argument. " 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 7, 301 | "metadata": { 302 | "collapsed": false 303 | }, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "a_list count for stout is: 2\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "print('a_list count for stout is:', a_list.count('stout'))" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | " " 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "The example below illustrates Python's flexibility. Since nearly everything in Python is an object, you can have a list containing other lists. The built-in len() method provides a method to count the number of items in a list." 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 8, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [ 338 | { 339 | "name": "stdout", 340 | "output_type": "stream", 341 | "text": [ 342 | "['ales', 23, ['stout', 'stout', 'malt', 'lager', 'hefeweizen', 'ale']]\n", 343 | "Item count for b_list is: 3\n" 344 | ] 345 | } 346 | ], 347 | "source": [ 348 | "b_list = ['ales', 23, a_list]\n", 349 | "print(b_list)\n", 350 | "print('Item count for b_list is:', len(b_list))" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | " " 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "## tuple\n", 365 | "\n", 366 | "A tuple is similar to a list, but unlike lists, are immutable. Tuples are defined by a list of comma-separated items inside a set of optional parentheses. Legibility of code demands their use, however.\n", 367 | "\n", 368 | "A common use case for tuples is where Python statements or user-defined functions can assume that the items will not change, for example the names of the months." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 9, 374 | "metadata": { 375 | "collapsed": false 376 | }, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "The breakfast menu has: 5 items\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "dishes = ('eggs', 'green ham', 'biscuits', 'grits', 'steak')\n", 388 | "print('The breakfast menu has:', len(dishes), 'items')" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | " " 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 10, 401 | "metadata": { 402 | "collapsed": false 403 | }, 404 | "outputs": [ 405 | { 406 | "name": "stdout", 407 | "output_type": "stream", 408 | "text": [ 409 | "pancakes cupcakes twinkies dishes " 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "more_dishes = ('pancakes', 'cupcakes', 'twinkies', 'dishes')\n", 415 | "for i in more_dishes:\n", 416 | " print(i, end=' ')" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | " " 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "The tuple within a tuple remains one that is indexed" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 11, 436 | "metadata": { 437 | "collapsed": false 438 | }, 439 | "outputs": [ 440 | { 441 | "data": { 442 | "text/plain": [ 443 | "4" 444 | ] 445 | }, 446 | "execution_count": 11, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "len(more_dishes)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "How to count all of the items in the tuple. \n", 460 | " 1. get the number of items in the tuple: more_dishes. \n", 461 | " 2. subtract 1 since the 'tuple-within-a-tuple' item is not counted. \n", 462 | " 3. add the length of the tuple more_dishes starting at position [3]\n" 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | " " 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 12, 475 | "metadata": { 476 | "collapsed": false 477 | }, 478 | "outputs": [ 479 | { 480 | "name": "stdout", 481 | "output_type": "stream", 482 | "text": [ 483 | "The number of items in more_dishes is: 9\n" 484 | ] 485 | } 486 | ], 487 | "source": [ 488 | "print('The number of items in more_dishes is:', len(more_dishes)-1+len(more_dishes[3]))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "## dictionary\n", 496 | "\n", 497 | "A dictionary provides a look-up method through key/value pairs. Keys must be unique. Keys must be immutable objects such as lists, however, values can be either mutable or immutable objects.\n", 498 | "\n", 499 | "Key/value pairs are specified as:\n", 500 | "\n", 501 | "x = {key1: value1, key2: value2, key_n: value_n} " 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "Create a dictionary" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | " " 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 13, 521 | "metadata": { 522 | "collapsed": false 523 | }, 524 | "outputs": [ 525 | { 526 | "name": "stdout", 527 | "output_type": "stream", 528 | "text": [ 529 | "\n" 530 | ] 531 | } 532 | ], 533 | "source": [ 534 | "capital = {'Oregon' : 'Salem',\n", 535 | " 'Washington' : 'Olympia',\n", 536 | " 'California' : 'Sacrament',\n", 537 | " 'Nevada' : 'Carson City'\n", 538 | " }\n", 539 | "print(type(capital)) " 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "Print a dictionary value by key" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 14, 552 | "metadata": { 553 | "collapsed": false 554 | }, 555 | "outputs": [ 556 | { 557 | "name": "stdout", 558 | "output_type": "stream", 559 | "text": [ 560 | "The capital of Nevada is Carson City\n" 561 | ] 562 | } 563 | ], 564 | "source": [ 565 | "print('The capital of Nevada is', capital['Nevada'])" 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "metadata": {}, 571 | "source": [ 572 | " " 573 | ] 574 | }, 575 | { 576 | "cell_type": "markdown", 577 | "metadata": {}, 578 | "source": [ 579 | "Add a key/value pair to the dictionary" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 15, 585 | "metadata": { 586 | "collapsed": false 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "capital['Colorado'] = 'Denver'" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": {}, 596 | "source": [ 597 | " " 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "Printing key/value pairs. String formatting is covered in Chapter 03 -- Data Types and Formatting" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 16, 610 | "metadata": { 611 | "collapsed": false 612 | }, 613 | "outputs": [ 614 | { 615 | "name": "stdout", 616 | "output_type": "stream", 617 | "text": [ 618 | "Number of value/pairs in the dictionary capital is 5:\n" 619 | ] 620 | } 621 | ], 622 | "source": [ 623 | "print('Number of value/pairs in the dictionary capital is {}:'.format(len(capital)))" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | " " 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": {}, 636 | "source": [ 637 | "Delete a value." 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 17, 643 | "metadata": { 644 | "collapsed": true 645 | }, 646 | "outputs": [], 647 | "source": [ 648 | "del capital['California']" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | " " 656 | ] 657 | }, 658 | { 659 | "cell_type": "markdown", 660 | "metadata": {}, 661 | "source": [ 662 | "Many Python operations are silent unless an error is raised. Check the length of the dictionary following the delete operation." 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 18, 668 | "metadata": { 669 | "collapsed": false 670 | }, 671 | "outputs": [ 672 | { 673 | "name": "stdout", 674 | "output_type": "stream", 675 | "text": [ 676 | "Number of value/pairs in the dictionary capital is 4:\n" 677 | ] 678 | } 679 | ], 680 | "source": [ 681 | "print('Number of value/pairs in the dictionary capital is {}:'.format(len(capital)))" 682 | ] 683 | }, 684 | { 685 | "cell_type": "markdown", 686 | "metadata": {}, 687 | "source": [ 688 | "## sequences\n", 689 | "\n", 690 | "There are three basic sequence types:\n", 691 | "\n", 692 | " list \n", 693 | " tuple \n", 694 | " range\n", 695 | " \n", 696 | "A feature for sequences is membership testing. Using operators such as in, not in, and concatenation, you can test for the presence or absences of values. \n", 697 | "\n", 698 | "Implicit in these examples are boolean values(True False) being returned. Boolean operators are discussed in Chapter 03, Boolean operators.\n", 699 | "\n", 700 | "The precedence order for sequence operations is found here." 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | " " 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "The example below does an item membership test for the a_list list object." 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 19, 720 | "metadata": { 721 | "collapsed": false 722 | }, 723 | "outputs": [ 724 | { 725 | "name": "stdout", 726 | "output_type": "stream", 727 | "text": [ 728 | "found\n" 729 | ] 730 | } 731 | ], 732 | "source": [ 733 | "item = 'ale'\n", 734 | "if (item in a_list):\n", 735 | " print('found')\n", 736 | "else:\n", 737 | " print('not found')" 738 | ] 739 | }, 740 | { 741 | "cell_type": "markdown", 742 | "metadata": {}, 743 | "source": [ 744 | " " 745 | ] 746 | }, 747 | { 748 | "cell_type": "markdown", 749 | "metadata": {}, 750 | "source": [ 751 | "Item membership test in a tuple using the boolean and operator." 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": 20, 757 | "metadata": { 758 | "collapsed": false 759 | }, 760 | "outputs": [ 761 | { 762 | "name": "stdout", 763 | "output_type": "stream", 764 | "text": [ 765 | "found\n" 766 | ] 767 | } 768 | ], 769 | "source": [ 770 | "item1 = 'eggs' \n", 771 | "item2 = 'cupcakes'\n", 772 | "if (item1 and item2 in more_dishes):\n", 773 | " print('found')\n", 774 | "else:\n", 775 | " print('not found')" 776 | ] 777 | }, 778 | { 779 | "cell_type": "markdown", 780 | "metadata": {}, 781 | "source": [ 782 | " " 783 | ] 784 | }, 785 | { 786 | "cell_type": "markdown", 787 | "metadata": {}, 788 | "source": [ 789 | "A for statement to iterate over the key value pairs in the 'capital' dictionary." 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 21, 795 | "metadata": { 796 | "collapsed": false 797 | }, 798 | "outputs": [ 799 | { 800 | "name": "stdout", 801 | "output_type": "stream", 802 | "text": [ 803 | "The capital of Washington is Olympia\n", 804 | "The capital of Nevada is Carson City\n", 805 | "The capital of Colorado is Denver\n", 806 | "The capital of Oregon is Salem\n" 807 | ] 808 | } 809 | ], 810 | "source": [ 811 | "for state, capital in capital.items():\n", 812 | " print('The capital of {} is {}'.format(state, capital))" 813 | ] 814 | }, 815 | { 816 | "cell_type": "markdown", 817 | "metadata": {}, 818 | "source": [ 819 | " " 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "if logic used for membership testing. The if/elif statements are documented here. " 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": 22, 832 | "metadata": { 833 | "collapsed": false 834 | }, 835 | "outputs": [], 836 | "source": [ 837 | "if 'Nevada' in capital:\n", 838 | " print(\"Nevada's capital:\", capital['Nevada'])" 839 | ] 840 | }, 841 | { 842 | "cell_type": "markdown", 843 | "metadata": {}, 844 | "source": [ 845 | " " 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "metadata": {}, 851 | "source": [ 852 | "Using the boolean not operator for membership test" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 23, 858 | "metadata": { 859 | "collapsed": false 860 | }, 861 | "outputs": [ 862 | { 863 | "data": { 864 | "text/plain": [ 865 | "True" 866 | ] 867 | }, 868 | "execution_count": 23, 869 | "metadata": {}, 870 | "output_type": "execute_result" 871 | } 872 | ], 873 | "source": [ 874 | "'California' not in capital" 875 | ] 876 | }, 877 | { 878 | "cell_type": "markdown", 879 | "metadata": {}, 880 | "source": [ 881 | "## set\n", 882 | "\n", 883 | "A set object is an unordered collection of distinct objects. They are used for membership testing, removing duplicates from a sequence, and computing mathematical operations such as intersection, union, difference, and symmetric differences. \n", 884 | "\n", 885 | "Another common use case is when the existence of an object is more important than order or obtaining a count of occurrences of items. " 886 | ] 887 | }, 888 | { 889 | "cell_type": "markdown", 890 | "metadata": {}, 891 | "source": [ 892 | " " 893 | ] 894 | }, 895 | { 896 | "cell_type": "markdown", 897 | "metadata": {}, 898 | "source": [ 899 | "Membership test using the in operator for a set " 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": 24, 905 | "metadata": { 906 | "collapsed": false 907 | }, 908 | "outputs": [ 909 | { 910 | "data": { 911 | "text/plain": [ 912 | "False" 913 | ] 914 | }, 915 | "execution_count": 24, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "months1 = set(['January', 'February', 'March', 'April', 'May', 'June'])\n", 922 | "\n", 923 | "'Jan' in months1" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": {}, 929 | "source": [ 930 | " " 931 | ] 932 | }, 933 | { 934 | "cell_type": "markdown", 935 | "metadata": {}, 936 | "source": [ 937 | "copy() method for sets" 938 | ] 939 | }, 940 | { 941 | "cell_type": "code", 942 | "execution_count": 25, 943 | "metadata": { 944 | "collapsed": false 945 | }, 946 | "outputs": [], 947 | "source": [ 948 | "months2 = months1.copy()" 949 | ] 950 | }, 951 | { 952 | "cell_type": "markdown", 953 | "metadata": {}, 954 | "source": [ 955 | " " 956 | ] 957 | }, 958 | { 959 | "cell_type": "markdown", 960 | "metadata": {}, 961 | "source": [ 962 | "The remove() method for sets to delete an item" 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": 26, 968 | "metadata": { 969 | "collapsed": false 970 | }, 971 | "outputs": [], 972 | "source": [ 973 | "months1.remove('February')" 974 | ] 975 | }, 976 | { 977 | "cell_type": "markdown", 978 | "metadata": {}, 979 | "source": [ 980 | " " 981 | ] 982 | }, 983 | { 984 | "cell_type": "markdown", 985 | "metadata": {}, 986 | "source": [ 987 | " " 988 | ] 989 | }, 990 | { 991 | "cell_type": "markdown", 992 | "metadata": {}, 993 | "source": [ 994 | "The add() method for adding an item to a set" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": 27, 1000 | "metadata": { 1001 | "collapsed": false 1002 | }, 1003 | "outputs": [], 1004 | "source": [ 1005 | "months2.add('July')" 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "markdown", 1010 | "metadata": {}, 1011 | "source": [ 1012 | " " 1013 | ] 1014 | }, 1015 | { 1016 | "cell_type": "markdown", 1017 | "metadata": {}, 1018 | "source": [ 1019 | "The three examples above do not produce any visible output. The sets Month1 and Month2 are displayed below." 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "code", 1024 | "execution_count": 28, 1025 | "metadata": { 1026 | "collapsed": false 1027 | }, 1028 | "outputs": [ 1029 | { 1030 | "name": "stdout", 1031 | "output_type": "stream", 1032 | "text": [ 1033 | "The set months1 contains: {'January', 'June', 'May', 'April', 'March'}\n", 1034 | "The set months2 contains: {'June', 'March', 'May', 'April', 'July', 'January', 'February'}\n" 1035 | ] 1036 | } 1037 | ], 1038 | "source": [ 1039 | "print('The set months1 contains:', months1)\n", 1040 | "print('The set months2 contains:', months2)" 1041 | ] 1042 | }, 1043 | { 1044 | "cell_type": "markdown", 1045 | "metadata": {}, 1046 | "source": [ 1047 | " " 1048 | ] 1049 | }, 1050 | { 1051 | "cell_type": "markdown", 1052 | "metadata": {}, 1053 | "source": [ 1054 | "Now we can find the intersection of the sets 'months1' and 'months2'" 1055 | ] 1056 | }, 1057 | { 1058 | "cell_type": "code", 1059 | "execution_count": 29, 1060 | "metadata": { 1061 | "collapsed": false 1062 | }, 1063 | "outputs": [ 1064 | { 1065 | "data": { 1066 | "text/plain": [ 1067 | "{'April', 'January', 'June', 'March', 'May'}" 1068 | ] 1069 | }, 1070 | "execution_count": 29, 1071 | "metadata": {}, 1072 | "output_type": "execute_result" 1073 | } 1074 | ], 1075 | "source": [ 1076 | "months1 & months2" 1077 | ] 1078 | }, 1079 | { 1080 | "cell_type": "markdown", 1081 | "metadata": {}, 1082 | "source": [ 1083 | " " 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "markdown", 1088 | "metadata": {}, 1089 | "source": [ 1090 | "Test if the set months2 is a super-set of the set months1 " 1091 | ] 1092 | }, 1093 | { 1094 | "cell_type": "code", 1095 | "execution_count": 30, 1096 | "metadata": { 1097 | "collapsed": false 1098 | }, 1099 | "outputs": [ 1100 | { 1101 | "data": { 1102 | "text/plain": [ 1103 | "True" 1104 | ] 1105 | }, 1106 | "execution_count": 30, 1107 | "metadata": {}, 1108 | "output_type": "execute_result" 1109 | } 1110 | ], 1111 | "source": [ 1112 | "months2.issuperset(months1)" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "markdown", 1117 | "metadata": { 1118 | "collapsed": true 1119 | }, 1120 | "source": [ 1121 | "## Resources:\n", 1122 | " \n", 1123 | "v3.1.5 Documentation for Data Structures \n", 1124 | "\n", 1125 | " Python for Data Analysis The landscape of tutorials describing a range of resources across the web. While a bit dated, it is useful for getting started and becoming familiar with the wide variety of packages built on top of Python.\n", 1126 | "\n", 1127 | " A Byte of Python ...is a free book on programming using the Python language. It serves as a tutorial or guide to the Python language for a beginner audience. If all you know about computers is how to save text files, then this is the book for you.\n", 1128 | "\n", 1129 | " Python Numpy Tutorial An overview of Nympy by Justin Johnson." 1130 | ] 1131 | }, 1132 | { 1133 | "cell_type": "markdown", 1134 | "metadata": {}, 1135 | "source": [ 1136 | "## Navigation\n", 1137 | "\n", 1138 | " Return to Chapter List " 1139 | ] 1140 | } 1141 | ], 1142 | "metadata": { 1143 | "anaconda-cloud": {}, 1144 | "kernelspec": { 1145 | "display_name": "Python [Root]", 1146 | "language": "python", 1147 | "name": "Python [Root]" 1148 | }, 1149 | "language_info": { 1150 | "codemirror_mode": { 1151 | "name": "ipython", 1152 | "version": 3 1153 | }, 1154 | "file_extension": ".py", 1155 | "mimetype": "text/x-python", 1156 | "name": "python", 1157 | "nbconvert_exporter": "python", 1158 | "pygments_lexer": "ipython3", 1159 | "version": "3.5.2" 1160 | } 1161 | }, 1162 | "nbformat": 4, 1163 | "nbformat_minor": 0 1164 | } 1165 | -------------------------------------------------------------------------------- /Chapter 08 -- Date, Time, and Timedelta Objects.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Chapter 08 -- Date, Time, and Timedelta Objects" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Topics Covered:\n", 15 | "\n", 16 | " String Literal Mapped to datetime timestamp \n", 17 | "\n", 18 | " date objects \n", 19 | "\n", 20 | " strftime() and strptime() methods \n", 21 | "\n", 22 | " dateutil.parser \n", 23 | "\n", 24 | " time objects \n", 25 | "\n", 26 | " timedelta objects \n", 27 | "\n", 28 | " Resources " 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | " " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "\n", 43 | "Chapter 3, Data Types and Formatting briefly introduces Python date and datetimes. \n", 44 | "\n", 45 | "This chapter covers Python Date, Time, and Datetime objects and details for handling these objects as well as mapping strings to datetime values and the reverse operation. By understanding the lower-level behaviors of Date and Time objects, you will be able to grasp Chapter 9, Pandas Time Series Capabilities and Datetime Handling.\n", 46 | "\n", 47 | "Many time series have fixed interval or frequency, for example, home sales on a monthly basis or web log record processed every 15 seconds. Time series may also have irregular frequencies, in which case you may need to standardize, reshape, or change frequencies." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "source": [ 56 | "## Definitions\n", 57 | "\n", 58 | " - Timestamps are instances in time\n", 59 | "\n", 60 | " - Periods such as the current month of current year\n", 61 | " \n", 62 | " - Intervals indicated by start and end timestamps\n", 63 | "\n", 64 | "Recall, you can always return an object's type with the type method:\n", 65 | "\n", 66 | " type()\n", 67 | " " 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "In Python the datetime class be broken down into 5 categories:\n", 75 | "\n", 76 | "The first 4 will be examined briefly.\n", 77 | "\n", 78 | "\n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | "\n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | "

Object Type	Description	SAS Analogs
date	Stores calendar date(year, month, & day)	SAS date value = ' 24Oct16 ' d
time	Stores time (hours, minutes, seconds, & microseconds)	SAS time value = ' 12:34:56 ' t
datetime	Stores date & time together	SAS datetime value = ' 14Oct16:12:34:56 ' dt
timedelta	The difference between two datetime values	SAS datetime interval functions
tzinfo	Hanldes timezone related issues	Not generally dealt with

" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | " " 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | " " 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 1, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "from datetime import date, time, datetime, timedelta\n", 137 | "import numpy as np\n", 138 | "import pandas as pd\n", 139 | "from pandas import Series, DataFrame, Index" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "source": [ 148 | "## String Literal Mapped to datetime timestamp" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "A timestamp is time value that represents a count of the number of seconds from the start of an epoch. This is similiar to SAS datetime values that represent an off-set from an epoch beginning at midnight. " 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 2, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "pdt = pd.Timestamp('2016-10-24')" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 3, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "pandas.tslib.Timestamp" 180 | ] 181 | }, 182 | "execution_count": 3, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "type(pdt)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## date objects" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "The syntax for constructing a date object is:\n", 203 | "\n", 204 | " date(year = yyyy, month = mm, day = dd)\n", 205 | " \n", 206 | " Where: yyyy is an integer ranging from 1 to 9999 by default\n", 207 | " mm is an integer ranging from 1 to 12 inclusive\n", 208 | " dd is an integer ranging from 1 to the number of days in the month of the year\n", 209 | "\n", 210 | "The details for the date object are found here " 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | " " 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "Construct an arbitrary date" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 4, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "1776-07-04\n", 239 | "\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "ind_day = date(1776, 7, 4)\n", 245 | "print(ind_day)\n", 246 | "print(type(ind_day))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | " " 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "Return today's date. The SAS analog is the today() function." 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 5, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "datetime.date(2016, 11, 13)" 274 | ] 275 | }, 276 | "execution_count": 5, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "date.today()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | " " 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "The .year, .month, and .day attribute for datetime objects returns year, month, and day respectively. These attributes return integers. \n", 297 | "\n", 298 | "SAS has the analog functions, year(), month(), day(), used to return the respective portions of a SAS datetime value. These functions return numeric values. " 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 6, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "Year: 2016\n", 313 | "Month: 11\n", 314 | "Day: 13\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "print('Year:', date.today().year)\n", 320 | "print('Month:', date.today().month) \n", 321 | "print('Day:', date.today().day)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "The analog SAS program for the examples in cells #3 to #6 is below." 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "````\n", 336 | " /******************************************************/\n", 337 | " /* c08_datetime_functions1.sas */\n", 338 | " /******************************************************/\n", 339 | " 37 data _null_;\n", 340 | " 38 \n", 341 | " 39 today = today();\n", 342 | " 40 ind_day = '04Jul1776'd;\n", 343 | " 41 \n", 344 | " 42 today_f = put(today,mmddyy10.);\n", 345 | " 43 put 'unformatted, today is: ' today /\n", 346 | " 44 'formatted with mmddyy10. , today is: ' today_f;\n", 347 | " 45 \n", 348 | " 46 m = month(today);\n", 349 | " 47 d = day(today);\n", 350 | " 48 y = year(today);\n", 351 | " 49 \n", 352 | " 50 put 'Month returns: ' m\n", 353 | " 51 ' Day returns: ' d\n", 354 | " 52 ' Year returns ' y;\n", 355 | "\n", 356 | " unformatted, today is: 20748\n", 357 | " formatted with mmddyy10. , today is: 10/21/2016\n", 358 | " Month returns: 10 Day returns: 21 Year returns 2016\n", 359 | "\n", 360 | "````" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | " " 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "The .year, .month, and .day attributes also work with arbitrary dates as well." 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 7, 380 | "metadata": { 381 | "collapsed": false 382 | }, 383 | "outputs": [ 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | "Year: 1776 Month: 7 and day: 4\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "print('Year:', ind_day.year, 'Month:', ind_day.month, 'and day:', ind_day.day) " 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | " " 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "The weekday() method returns an integer representing day of the week where Monday is 0 and Sunday is 6. The SAS analog is the WEEKDAY() function that returns numeric where 1 is Sunday and Saturday is 7." 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 8, 413 | "metadata": { 414 | "collapsed": false 415 | }, 416 | "outputs": [ 417 | { 418 | "name": "stdout", 419 | "output_type": "stream", 420 | "text": [ 421 | "3\n" 422 | ] 423 | } 424 | ], 425 | "source": [ 426 | "print(date.weekday(ind_day))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "## strftime() and strptime() methods" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "Like SAS, when working with datetime objects, formatting is needed to render dates into familiar date and time designations. In cell # 9 below, we render the constituent parts of the ind_day object created in cell #2 above. We also need formatting when creating datetime objects from string representations of datetime illustrated below.\n", 441 | "\n", 442 | "The strftime() method (string formatter for time) is used to create string repersentations of Python datetime objects. \n", 443 | "\n", 444 | "The strptime() method (string parser for time) is used to create datetime objects from string representations of datetime.\n", 445 | "\n", 446 | "The directives for formatting both are found here. \n", 447 | "\n", 448 | "SAS formats are analogs to the strftime() method. SAS informats are analogs to the strptime() method." 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | " " 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": {}, 461 | "source": [ 462 | "### strftime examples" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 9, 468 | "metadata": { 469 | "collapsed": false 470 | }, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "Day: Thursday\n", 477 | "The convential method to display the date is: Thu Jul 4 00:00:00 1776\n", 478 | "Another way to display the date is: Thursday July 04, 1776\n" 479 | ] 480 | } 481 | ], 482 | "source": [ 483 | "print('Day:', ind_day.strftime('%A'))\n", 484 | "print('The convential method to display the date is:', ind_day.strftime('%c'))\n", 485 | "print('Another way to display the date is:', ind_day.strftime('%A %B %d, %Y'))" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "The SAS example below illustrate these commonly used SAS formats:\n", 493 | " \n", 494 | "date11. \n", 495 | "\n", 496 | "ddmmyy10. \n", 497 | " \n", 498 | "weekdate17. \n", 499 | " \n", 500 | "yymmdd10. " 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | " " 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "The strftime() method apply format directives to datetime objects. As illustrated above, these directives can be combined to form the desired output. This includes any arbitrary characters such as comma, slash, or white-space needed to form the output. " 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 10, 520 | "metadata": { 521 | "collapsed": false 522 | }, 523 | "outputs": [ 524 | { 525 | "name": "stdout", 526 | "output_type": "stream", 527 | "text": [ 528 | "Last day is: 31-Dec-2016\n", 529 | "Last day is: 31/12/2016\n", 530 | "Last day is: Sat, Dec 31, 2016\n", 531 | "Last day is: 2016-12-31\n" 532 | ] 533 | } 534 | ], 535 | "source": [ 536 | "last_day = date(2016, 12, 31)\n", 537 | "print('Last day is:', last_day.strftime(\"%d-%b-%Y\")) # date11. format\n", 538 | "print('Last day is:', last_day.strftime(\"%d/%m/%Y\")) # ddmmyy10. format\n", 539 | "print('Last day is:', last_day.strftime(\"%a, %b %d, %Y\")) # weekdate17. format\n", 540 | "print('Last day is:', last_day.strftime(\"%Y-%m-%d\")) # yymmdd10. format" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | " " 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "The SAS program below illustrates formats to map datetime values into string representation similiar to the strftime() method." 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "````\n", 562 | " /******************************************************/\n", 563 | " /* c08_lastday_formats.sas */\n", 564 | " /******************************************************/\n", 565 | " 4 data _null_;\n", 566 | " 5 last_day = '31Dec16'd;\n", 567 | " 6 \n", 568 | " 7 ld_f1 = put(last_day, date11.);\n", 569 | " 8 ld_f2 = put(last_day, ddmmyy10.);\n", 570 | " 9 ld_f3 = put(last_day, weekdate17.);\n", 571 | " 10 ld_f4 = put(last_day, yymmdd10.);\n", 572 | " 11 \n", 573 | " 12 put 'Last day is: ' ld_f1;\n", 574 | " 13 put 'Last day is: ' ld_f2;\n", 575 | " 14 put 'Last day is: ' ld_f3;\n", 576 | " 15 put 'Last day is: ' ld_f4;\n", 577 | "\n", 578 | " Last day is: 31-DEC-2016\n", 579 | " Last day is: 31/12/2016\n", 580 | " Last day is: Sat, Dec 31, 2016\n", 581 | " Last day is: 2016-12-31\n", 582 | "````" 583 | ] 584 | }, 585 | { 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | " " 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "The strptime() method is used to create or parse a datetime object from a string representation of datetime corresponding to the format string supplied. See the example below in cell #9. " 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 11, 602 | "metadata": { 603 | "collapsed": false 604 | }, 605 | "outputs": [ 606 | { 607 | "name": "stdout", 608 | "output_type": "stream", 609 | "text": [ 610 | "2016-01-01 00:00:00\n", 611 | "type for str_date is: \n", 612 | "type for first_date is: \n" 613 | ] 614 | } 615 | ], 616 | "source": [ 617 | "str_date = \"01/01/2016\"\n", 618 | "first_date = datetime.strptime(str_date, \"%d/%m/%Y\" )\n", 619 | "print(first_date)\n", 620 | "print('type for str_date is:', type(str_date))\n", 621 | "print('type for first_date is:', type(first_date))" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | " " 629 | ] 630 | }, 631 | { 632 | "cell_type": "markdown", 633 | "metadata": {}, 634 | "source": [ 635 | "The SAS program below illustrates informats to map string representations of datetime into a SAS datetime value similiar to the strptime() method." 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "````\n", 643 | " /******************************************************/\n", 644 | " /* c08_informat_string_to_datetime.sas */\n", 645 | " /******************************************************/\n", 646 | " 45 data _null_;\n", 647 | " 46 \n", 648 | " 47 str_date = \"01/01/2016\";\n", 649 | " 48 \n", 650 | " 49 first_date = input(str_date, ddmmyy10.);\n", 651 | " 50 \n", 652 | " 51 put first_date ddmmyy10.;\n", 653 | "\n", 654 | " 01/01/2016\n", 655 | "````" 656 | ] 657 | }, 658 | { 659 | "cell_type": "markdown", 660 | "metadata": {}, 661 | "source": [ 662 | "## dateutil.parser" 663 | ] 664 | }, 665 | { 666 | "cell_type": "markdown", 667 | "metadata": {}, 668 | "source": [ 669 | "It can be tedious to constantly write the directives used to control the strptime() method. A useful alternative is to use the third-party dateutil.parser. It can parse nearly any datetime string literals into datetime objects." 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": 12, 675 | "metadata": { 676 | "collapsed": true 677 | }, 678 | "outputs": [], 679 | "source": [ 680 | "from dateutil.parser import parse" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | " " 688 | ] 689 | }, 690 | { 691 | "cell_type": "markdown", 692 | "metadata": {}, 693 | "source": [ 694 | " " 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": 13, 700 | "metadata": { 701 | "collapsed": false 702 | }, 703 | "outputs": [ 704 | { 705 | "data": { 706 | "text/plain": [ 707 | "datetime.datetime(2016, 1, 1, 0, 0)" 708 | ] 709 | }, 710 | "execution_count": 13, 711 | "metadata": {}, 712 | "output_type": "execute_result" 713 | } 714 | ], 715 | "source": [ 716 | "strt_yr = parse('2016/01/01')\n", 717 | "strt_yr" 718 | ] 719 | }, 720 | { 721 | "cell_type": "markdown", 722 | "metadata": {}, 723 | "source": [ 724 | " " 725 | ] 726 | }, 727 | { 728 | "cell_type": "markdown", 729 | "metadata": {}, 730 | "source": [ 731 | "### Basic date aritmetic illustrating the replace() method." 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | " " 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 14, 744 | "metadata": { 745 | "collapsed": false 746 | }, 747 | "outputs": [ 748 | { 749 | "name": "stdout", 750 | "output_type": "stream", 751 | "text": [ 752 | "2015-12-31\n", 753 | "True\n" 754 | ] 755 | } 756 | ], 757 | "source": [ 758 | "d = date(2015, 12, 31)\n", 759 | "if d == date(2015, 12, 31):\n", 760 | " d2 = d.replace(day=25) == date(2015, 12, 25)\n", 761 | "print(d)\n", 762 | "print(d2)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | " " 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 15, 775 | "metadata": { 776 | "collapsed": false 777 | }, 778 | "outputs": [ 779 | { 780 | "name": "stdout", 781 | "output_type": "stream", 782 | "text": [ 783 | "Next birthday is: 2017-01-24\n", 784 | "72 days, 0:00:00\n", 785 | "\n" 786 | ] 787 | } 788 | ], 789 | "source": [ 790 | "today = date.today()\n", 791 | "\n", 792 | "birth_day = date(today.year, 1, 24)\n", 793 | "\n", 794 | "if birth_day < today:\n", 795 | " birth_day = birth_day.replace(year=today.year + 1)\n", 796 | "print('Next birthday is:', birth_day)\n", 797 | "\n", 798 | "days_until = abs(birth_day - today)\n", 799 | "print(days_until)\n", 800 | "print(type(days_until))" 801 | ] 802 | }, 803 | { 804 | "cell_type": "markdown", 805 | "metadata": {}, 806 | "source": [ 807 | " " 808 | ] 809 | }, 810 | { 811 | "cell_type": "markdown", 812 | "metadata": {}, 813 | "source": [ 814 | "The example above in cell #11 is another illustration of date arithmetic. The if condition determine whether the birthday has occured this year (in this case, True) and if True, uses the date.replace() method to increment to the next year. The syntax:\n", 815 | "\n", 816 | " days_until = abs(birth_day - today)\n", 817 | " \n", 818 | "returns a timedelta. As the name suggests, it is a duration period. Additional examples of working with timedelta objects is below." 819 | ] 820 | }, 821 | { 822 | "cell_type": "markdown", 823 | "metadata": {}, 824 | "source": [ 825 | "The analog SAS program for counting the number of days until the next birthday is below. \n", 826 | "\n", 827 | "In both the Python and SAS examples had we added 365 days in the conditional branch of the program the results would have been 94 days since 2016 is a leap year. This is a reminder that short-cuts like adding 365.25 to increment to the next year or adding 24 hours to increment to the following day is fraught with unintended consequences. That is why these functions and operators are available." 828 | ] 829 | }, 830 | { 831 | "cell_type": "markdown", 832 | "metadata": {}, 833 | "source": [ 834 | "````\n", 835 | " /******************************************************/\n", 836 | " /* c08_birthdate_calculation.sas */\n", 837 | " /******************************************************/\n", 838 | " 35 data _null_;\n", 839 | " 36 \n", 840 | " 37 today = today();\n", 841 | " 38 \n", 842 | " 39 birth_day = '24Jan16'd;\n", 843 | " 40 \n", 844 | " 41 if birth_day < today then do;\n", 845 | " 42 next_birth_day = intnx('year', birth_day, 1, 'sameday');\n", 846 | " 43 f_b_day = put(next_birth_day, mmddyy10.);\n", 847 | " 44 put 'Next birthday is: ' f_b_day;\n", 848 | " 45 end;\n", 849 | " 46 \n", 850 | " 47 days_until = abs(next_birth_day - today);\n", 851 | " 48 put days_until ' days';\n", 852 | "\n", 853 | " Next birthday is: 01/24/2017\n", 854 | " 95 days\n", 855 | "````" 856 | ] 857 | }, 858 | { 859 | "cell_type": "markdown", 860 | "metadata": {}, 861 | "source": [ 862 | "## time objects" 863 | ] 864 | }, 865 | { 866 | "cell_type": "markdown", 867 | "metadata": {}, 868 | "source": [ 869 | "The time object behaves similiar to the date object with respect to displaying and mapping string representations of time into time objects. The SAS analogs are similar as well. \n", 870 | "\n", 871 | "The syntax for constructing a time object is:\n", 872 | "\n", 873 | " time(hour=hh, minute=mm, second=0ss, microsecond=ms)\n", 874 | " \n", 875 | " Where hh is 0 <= hour < 24\n", 876 | " mm is 0 <= minute < 60\n", 877 | " ss is 0 <= second < 60\n", 878 | " ms is 0 <= microsecond < 1000000\n", 879 | " \n", 880 | "The time object has two other parameters, tzinfo and fold outside the scope of these examples. The details for the time object are found here ." 881 | ] 882 | }, 883 | { 884 | "cell_type": "markdown", 885 | "metadata": {}, 886 | "source": [ 887 | " " 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": 16, 893 | "metadata": { 894 | "collapsed": false 895 | }, 896 | "outputs": [ 897 | { 898 | "name": "stdout", 899 | "output_type": "stream", 900 | "text": [ 901 | "12:34:56\n" 902 | ] 903 | }, 904 | { 905 | "data": { 906 | "text/plain": [ 907 | "datetime.time" 908 | ] 909 | }, 910 | "execution_count": 16, 911 | "metadata": {}, 912 | "output_type": "execute_result" 913 | } 914 | ], 915 | "source": [ 916 | "go_time = time(12, 34, 56)\n", 917 | "print(go_time)\n", 918 | "type(go_time)" 919 | ] 920 | }, 921 | { 922 | "cell_type": "markdown", 923 | "metadata": {}, 924 | "source": [ 925 | " " 926 | ] 927 | }, 928 | { 929 | "cell_type": "markdown", 930 | "metadata": {}, 931 | "source": [ 932 | "Get the current time of day. The SAS analog is the time() function." 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 17, 938 | "metadata": { 939 | "collapsed": false 940 | }, 941 | "outputs": [ 942 | { 943 | "data": { 944 | "text/plain": [ 945 | "datetime.datetime(2016, 11, 13, 16, 9, 29, 148013)" 946 | ] 947 | }, 948 | "execution_count": 17, 949 | "metadata": {}, 950 | "output_type": "execute_result" 951 | } 952 | ], 953 | "source": [ 954 | "datetime.now()" 955 | ] 956 | }, 957 | { 958 | "cell_type": "markdown", 959 | "metadata": {}, 960 | "source": [ 961 | " " 962 | ] 963 | }, 964 | { 965 | "cell_type": "markdown", 966 | "metadata": {}, 967 | "source": [ 968 | " " 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": 18, 974 | "metadata": { 975 | "collapsed": false 976 | }, 977 | "outputs": [ 978 | { 979 | "name": "stdout", 980 | "output_type": "stream", 981 | "text": [ 982 | "16\n", 983 | "9\n", 984 | "29\n" 985 | ] 986 | } 987 | ], 988 | "source": [ 989 | "print(datetime.now().hour)\n", 990 | "print(datetime.now().minute) \n", 991 | "print(datetime.now().second)" 992 | ] 993 | }, 994 | { 995 | "cell_type": "markdown", 996 | "metadata": {}, 997 | "source": [ 998 | " " 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "markdown", 1003 | "metadata": {}, 1004 | "source": [ 1005 | "The .hour, .minute, and .second attribute for datetime objects returns hour, minutes, and seconds respectively. These attributes return integers. \n", 1006 | "\n", 1007 | "SAS has the analog functions, hour(), minute(), second(), used to return the respective portions of a SAS datetime value. These functions return numeric values." 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "markdown", 1012 | "metadata": {}, 1013 | "source": [ 1014 | "````\n", 1015 | " /******************************************************/\n", 1016 | " /* c08_datetime_functions2.sas */\n", 1017 | " /******************************************************/\n", 1018 | " 21 data _null_;\n", 1019 | " 22 \n", 1020 | " 23 now = time();\n", 1021 | " 24 \n", 1022 | " 25 go_time = \"12:34:45\"t;\n", 1023 | " 26 \n", 1024 | " 27 now_f = put(now, time8.);\n", 1025 | " 28 put 'unformatted, now is ' now /\n", 1026 | " 29 'formatted with time8., now is: ' now_f;\n", 1027 | " 30 \n", 1028 | " 31 h = hour(now);\n", 1029 | " 32 m = minute(now);\n", 1030 | " 33 s = second(now);\n", 1031 | " 34 \n", 1032 | " 35 put 'hour returns: ' h\n", 1033 | " 36 ' minute returns: ' m\n", 1034 | " 37 ' second returns: ' s;\n", 1035 | "\n", 1036 | " unformatted, now is 57327.064\n", 1037 | " formatted with time8., now is: 15:55:27\n", 1038 | " hour returns: 15 minute returns: 55 second returns: 27.063999891\n", 1039 | "````" 1040 | ] 1041 | }, 1042 | { 1043 | "cell_type": "markdown", 1044 | "metadata": {}, 1045 | "source": [ 1046 | " " 1047 | ] 1048 | }, 1049 | { 1050 | "cell_type": "markdown", 1051 | "metadata": {}, 1052 | "source": [ 1053 | "The .hour, .minute, and .second attribute also work with arbitrary times." 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "code", 1058 | "execution_count": 19, 1059 | "metadata": { 1060 | "collapsed": false 1061 | }, 1062 | "outputs": [ 1063 | { 1064 | "name": "stdout", 1065 | "output_type": "stream", 1066 | "text": [ 1067 | "Hour: 12 Minutes: 34 and seconds: 56\n" 1068 | ] 1069 | } 1070 | ], 1071 | "source": [ 1072 | "print('Hour:', go_time.hour, 'Minutes:', go_time.minute, 'and seconds:', go_time.second)" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "markdown", 1077 | "metadata": {}, 1078 | "source": [ 1079 | " " 1080 | ] 1081 | }, 1082 | { 1083 | "cell_type": "markdown", 1084 | "metadata": {}, 1085 | "source": [ 1086 | "## timedelta objects" 1087 | ] 1088 | }, 1089 | { 1090 | "cell_type": "markdown", 1091 | "metadata": {}, 1092 | "source": [ 1093 | "As the name suggests, a timedelta object represents the difference two date or times. In other words, a duration.\n", 1094 | "\n", 1095 | "The details for the timedelta object are found here \n", 1096 | "\n", 1097 | "The SAS analog are the various date interval functions such as:\n", 1098 | "\n", 1099 | "INTNX \n", 1100 | "\n", 1101 | "INTGET \n", 1102 | "\n", 1103 | " INTFIT \n", 1104 | "\n", 1105 | "and others that are similiar." 1106 | ] 1107 | }, 1108 | { 1109 | "cell_type": "markdown", 1110 | "metadata": {}, 1111 | "source": [ 1112 | " " 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "markdown", 1117 | "metadata": {}, 1118 | "source": [ 1119 | "The results of an operation with a timedelta object returns a datetime object. The datetime method now() returns today's date similiar to the SAS function TODAY()." 1120 | ] 1121 | }, 1122 | { 1123 | "cell_type": "code", 1124 | "execution_count": 20, 1125 | "metadata": { 1126 | "collapsed": true 1127 | }, 1128 | "outputs": [], 1129 | "source": [ 1130 | "today = date.today()\n", 1131 | "moment = datetime.now().time()\n", 1132 | "now = datetime.combine(today, moment)" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "markdown", 1137 | "metadata": {}, 1138 | "source": [ 1139 | " " 1140 | ] 1141 | }, 1142 | { 1143 | "cell_type": "code", 1144 | "execution_count": 21, 1145 | "metadata": { 1146 | "collapsed": false 1147 | }, 1148 | "outputs": [ 1149 | { 1150 | "name": "stdout", 1151 | "output_type": "stream", 1152 | "text": [ 1153 | "2016-11-13 16:09:29.487036 2016-11-13 16:09:29.487036\n" 1154 | ] 1155 | } 1156 | ], 1157 | "source": [ 1158 | "print(today, moment, now)" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": {}, 1164 | "source": [ 1165 | " " 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": {}, 1171 | "source": [ 1172 | "What is 1000 days from now" 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": 22, 1178 | "metadata": { 1179 | "collapsed": false 1180 | }, 1181 | "outputs": [ 1182 | { 1183 | "data": { 1184 | "text/plain": [ 1185 | "datetime.date(2019, 8, 10)" 1186 | ] 1187 | }, 1188 | "execution_count": 22, 1189 | "metadata": {}, 1190 | "output_type": "execute_result" 1191 | } 1192 | ], 1193 | "source": [ 1194 | "today+timedelta(days=1000)" 1195 | ] 1196 | }, 1197 | { 1198 | "cell_type": "markdown", 1199 | "metadata": {}, 1200 | "source": [ 1201 | " " 1202 | ] 1203 | }, 1204 | { 1205 | "cell_type": "markdown", 1206 | "metadata": {}, 1207 | "source": [ 1208 | " " 1209 | ] 1210 | }, 1211 | { 1212 | "cell_type": "code", 1213 | "execution_count": 23, 1214 | "metadata": { 1215 | "collapsed": false 1216 | }, 1217 | "outputs": [ 1218 | { 1219 | "data": { 1220 | "text/plain": [ 1221 | "datetime.timedelta(1144)" 1222 | ] 1223 | }, 1224 | "execution_count": 23, 1225 | "metadata": {}, 1226 | "output_type": "execute_result" 1227 | } 1228 | ], 1229 | "source": [ 1230 | "future = date(2020, 1, 1)\n", 1231 | "days_until = future - today\n", 1232 | "days_until" 1233 | ] 1234 | }, 1235 | { 1236 | "cell_type": "markdown", 1237 | "metadata": {}, 1238 | "source": [ 1239 | "The SAS equivalent for timedelta object examples is below." 1240 | ] 1241 | }, 1242 | { 1243 | "cell_type": "markdown", 1244 | "metadata": {}, 1245 | "source": [ 1246 | "````\n", 1247 | " /******************************************************/\n", 1248 | " /* c08_timedelta_analog_example.sas */\n", 1249 | " /******************************************************/\n", 1250 | " 44 data _null_;\n", 1251 | " 45 \n", 1252 | " 46 today = today();\n", 1253 | " 47 t_f = put(today,mmddyy10.);\n", 1254 | " 48 put 'Today is: ' t_f;\n", 1255 | " 49 \n", 1256 | " 50 yesterday = intnx('day', today, -1);\n", 1257 | " 51 y_f = put(yesterday,mmddyy10.);\n", 1258 | " 52 put 'Yesterday was: ' y_f;\n", 1259 | " 53 \n", 1260 | " 54 thousand_days = intnx('day', today, 1000, 'e');\n", 1261 | " 55 td_f = put(thousand_days, mmddyy10.);\n", 1262 | " 56 put '1,000 days from now is: ' td_f;\n", 1263 | " 57 \n", 1264 | " 58 future = '01Jan2020'd;\n", 1265 | " 59 days_until = future - today;\n", 1266 | " 60 \n", 1267 | " 61 put days_until=;\n", 1268 | "\n", 1269 | " Today is: 10/24/2016\n", 1270 | " Yesterday was: 10/23/2016\n", 1271 | " 1,000 days from now is: 07/21/2019\n", 1272 | " days_until=1164\n", 1273 | "````" 1274 | ] 1275 | }, 1276 | { 1277 | "cell_type": "markdown", 1278 | "metadata": {}, 1279 | "source": [ 1280 | "## Resources\n", 1281 | "\n", 1282 | "Chapter 10, Time Series, \"Python for Data Analysis, by Wes McKinney, located here \n", 1283 | "\n", 1284 | " strftime and strptime behavior\n", 1285 | "\n", 1286 | " About Date and Time Intervals SAS 9.2 Language Reference: Concepts, Second Edition\n", 1287 | "\n", 1288 | "Land Surface Hydrology Group Computing Seminar, by Joe Hamman, University of Washington, located here ." 1289 | ] 1290 | }, 1291 | { 1292 | "cell_type": "markdown", 1293 | "metadata": {}, 1294 | "source": [ 1295 | " " 1296 | ] 1297 | }, 1298 | { 1299 | "cell_type": "markdown", 1300 | "metadata": {}, 1301 | "source": [ 1302 | "## Navigation\n", 1303 | "\n", 1304 | " Return to Chapter List " 1305 | ] 1306 | } 1307 | ], 1308 | "metadata": { 1309 | "anaconda-cloud": {}, 1310 | "kernelspec": { 1311 | "display_name": "Python [Root]", 1312 | "language": "python", 1313 | "name": "Python [Root]" 1314 | }, 1315 | "language_info": { 1316 | "codemirror_mode": { 1317 | "name": "ipython", 1318 | "version": 3 1319 | }, 1320 | "file_extension": ".py", 1321 | "mimetype": "text/x-python", 1322 | "name": "python", 1323 | "nbconvert_exporter": "python", 1324 | "pygments_lexer": "ipython3", 1325 | "version": "3.5.2" 1326 | } 1327 | }, 1328 | "nbformat": 4, 1329 | "nbformat_minor": 0 1330 | } 1331 | -------------------------------------------------------------------------------- /Data_Interchange_not_working_with_sas.df2sd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Interchange Examples" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | " " 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "source": [ 23 | " " 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "\n", 36 | "INSTALLED VERSIONS\n", 37 | "------------------\n", 38 | "commit: None\n", 39 | "python: 3.6.3.final.0\n", 40 | "python-bits: 64\n", 41 | "OS: Linux\n", 42 | "OS-release: 3.10.0-693.11.1.el7.x86_64\n", 43 | "machine: x86_64\n", 44 | "processor: x86_64\n", 45 | "byteorder: little\n", 46 | "LC_ALL: None\n", 47 | "LANG: en_US.UTF-8\n", 48 | "LOCALE: en_US.UTF-8\n", 49 | "\n", 50 | "pandas: 0.20.3\n", 51 | "pytest: 3.2.1\n", 52 | "pip: 9.0.1\n", 53 | "setuptools: 38.2.4\n", 54 | "Cython: 0.26.1\n", 55 | "numpy: 1.13.3\n", 56 | "scipy: 0.19.1\n", 57 | "xarray: None\n", 58 | "IPython: 6.2.1\n", 59 | "sphinx: 1.6.3\n", 60 | "patsy: 0.4.1\n", 61 | "dateutil: 2.6.1\n", 62 | "pytz: 2017.2\n", 63 | "blosc: None\n", 64 | "bottleneck: 1.2.1\n", 65 | "tables: 3.4.2\n", 66 | "numexpr: 2.6.2\n", 67 | "feather: 0.4.0\n", 68 | "matplotlib: 2.1.0\n", 69 | "openpyxl: 2.4.8\n", 70 | "xlrd: 1.1.0\n", 71 | "xlwt: 1.3.0\n", 72 | "xlsxwriter: 1.0.2\n", 73 | "lxml: 4.1.0\n", 74 | "bs4: 4.6.0\n", 75 | "html5lib: 0.999999999\n", 76 | "sqlalchemy: 1.1.13\n", 77 | "pymysql: None\n", 78 | "psycopg2: None\n", 79 | "jinja2: 2.9.6\n", 80 | "s3fs: None\n", 81 | "pandas_gbq: None\n", 82 | "pandas_datareader: None\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "import feather\n", 88 | "import pandas as pd\n", 89 | "import saspy\n", 90 | "pd.show_versions()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 2, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "Using SAS Config named: default\n", 103 | "SAS Connection established. Subprocess id is 4942\n", 104 | "\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "sas = saspy.SASsession()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | " " 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Example from:\n", 124 | "https://github.com/sassoftware/saspy/blob/master/saspy_example_github.ipynb" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | " " 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 3, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "cars = sas.sasdata('cars', libref='sashelp')" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | " print(type(cars))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | " " 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "SAS dataset SASHELP.CARS converted to panda dataframe car_df" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 5, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "car_df = cars.to_df()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 6, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | " print(type(car_df))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | " " 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "panda dataframe car_df converted to SAS dataset WORK.CARS" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 7, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "car_df2 = sas.df2sd(car_df, 'cars')" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 8, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | " print(type(car_df2))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | " " 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Read a dataframe from disk with the feather library created previously from a R dataframe" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 9, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "path = '/home/sas/notebook/r_staff.feather'\n", 267 | "pd_staff = feather.read_dataframe(path)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 10, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "employee category\n", 279 | "salary float64\n", 280 | "startdate datetime64[ns]\n", 281 | "stringsASFactors bool\n", 282 | "dtype: object" 283 | ] 284 | }, 285 | "execution_count": 10, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "pd_staff.dtypes" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 11, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "\n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "print(type(pd_staff))" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 12, 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "name": "stdout", 318 | "output_type": "stream", 319 | "text": [ 320 | " employee salary startdate stringsASFactors\n", 321 | "0 Thomas Gunther 27500.0 2010-11-01 False\n", 322 | "1 Nicholas Harbinger 33900.0 2008-03-25 False\n", 323 | "2 Gisela Benito 28000.0 2007-03-14 False\n", 324 | "3 Herbert Rudelich 35000.0 2011-01-24 False\n", 325 | "4 Emily Sirignano 36350.0 2009-12-15 False\n", 326 | "5 Michael Morrison 40000.0 2009-10-10 False\n", 327 | "6 Jacqueline Onieda 36500.0 2015-01-02 False\n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "print(pd_staff)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | " " 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | " " 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "The dataframe pd_staff appears acceptable to Python, but is not acceptable to the sas.df2sd call" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 13, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "ename": "NotImplementedError", 363 | "evalue": "", 364 | "output_type": "error", 365 | "traceback": [ 366 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 367 | "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", 368 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msd_staff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd_staff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 369 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", 370 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 371 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 372 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 373 | "\u001b[0;31mNotImplementedError\u001b[0m: " 374 | ] 375 | } 376 | ], 377 | "source": [ 378 | "sd_staff = sas.df2sd(pd_staff, 'staff')" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | " " 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "Copy the pd_staff dataframe with default deep=True" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 14, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": [ 403 | "140465635419640" 404 | ] 405 | }, 406 | "execution_count": 14, 407 | "metadata": {}, 408 | "output_type": "execute_result" 409 | } 410 | ], 411 | "source": [ 412 | "copy_deep_true = pd_staff.copy()\n", 413 | "id(pd_staff)\n" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 15, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "\n" 426 | ] 427 | }, 428 | { 429 | "data": { 430 | "text/plain": [ 431 | "140464967919432" 432 | ] 433 | }, 434 | "execution_count": 15, 435 | "metadata": {}, 436 | "output_type": "execute_result" 437 | } 438 | ], 439 | "source": [ 440 | "print(type(copy_deep_true))\n", 441 | "id(copy_deep_true)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | " " 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "Call the sas.df2sd method. The copied dataframe is not acceptable to this call." 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 16, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "ename": "NotImplementedError", 465 | "evalue": "", 466 | "output_type": "error", 467 | "traceback": [ 468 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 469 | "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", 470 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcdt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcopy_deep_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff_t'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 471 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", 472 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 473 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 474 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 475 | "\u001b[0;31mNotImplementedError\u001b[0m: " 476 | ] 477 | } 478 | ], 479 | "source": [ 480 | "cdt = sas.df2sd(copy_deep_true, 'staff_t')" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | " " 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "Copy pd_staff operation with deep=false parameter" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 17, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "140465635419640" 506 | ] 507 | }, 508 | "execution_count": 17, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "copy_deep_false = pd_staff.copy(deep=False)\n", 515 | "id(pd_staff)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 18, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "name": "stdout", 525 | "output_type": "stream", 526 | "text": [ 527 | "\n" 528 | ] 529 | }, 530 | { 531 | "data": { 532 | "text/plain": [ 533 | "140464967921000" 534 | ] 535 | }, 536 | "execution_count": 18, 537 | "metadata": {}, 538 | "output_type": "execute_result" 539 | } 540 | ], 541 | "source": [ 542 | "print(type(copy_deep_false))\n", 543 | "id(copy_deep_false)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | " " 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "Call the sas.df2sd method. copy_deep_false dataframe is not acceptable." 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 19, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "ename": "NotImplementedError", 567 | "evalue": "", 568 | "output_type": "error", 569 | "traceback": [ 570 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 571 | "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", 572 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcopy_deep_false\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff_f'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 573 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", 574 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 575 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 576 | "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 577 | "\u001b[0;31mNotImplementedError\u001b[0m: " 578 | ] 579 | } 580 | ], 581 | "source": [ 582 | "cdf = sas.df2sd(copy_deep_false, 'staff_f')" 583 | ] 584 | }, 585 | { 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | " " 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "Create the medals dataframe with the read.csv method" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 20, 602 | "metadata": { 603 | "collapsed": true 604 | }, 605 | "outputs": [], 606 | "source": [ 607 | "df_medals = pd.read_csv(\"http://winterolympicsmedals.com/medals.csv\")" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": 21, 613 | "metadata": {}, 614 | "outputs": [ 615 | { 616 | "data": { 617 | "text/html": [ 618 | "

\n", 619 | "\n", 632 | "\n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | "

	Year	City	Sport	Discipline	NOC	Event	Event gender	Medal
0	1924	Chamonix	Skating	Figure skating	AUT	individual	M	Silver
1	1924	Chamonix	Skating	Figure skating	AUT	individual	W	Gold
2	1924	Chamonix	Skating	Figure skating	AUT	pairs	X	Gold
3	1924	Chamonix	Bobsleigh	Bobsleigh	BEL	four-man	M	Bronze
4	1924	Chamonix	Ice Hockey	Ice Hockey	CAN	ice hockey	M	Gold

\n", 704 | "

" 705 | ], 706 | "text/plain": [ 707 | " Year City Sport Discipline NOC Event Event gender \\\n", 708 | "0 1924 Chamonix Skating Figure skating AUT individual M \n", 709 | "1 1924 Chamonix Skating Figure skating AUT individual W \n", 710 | "2 1924 Chamonix Skating Figure skating AUT pairs X \n", 711 | "3 1924 Chamonix Bobsleigh Bobsleigh BEL four-man M \n", 712 | "4 1924 Chamonix Ice Hockey Ice Hockey CAN ice hockey M \n", 713 | "\n", 714 | " Medal \n", 715 | "0 Silver \n", 716 | "1 Gold \n", 717 | "2 Gold \n", 718 | "3 Bronze \n", 719 | "4 Gold " 720 | ] 721 | }, 722 | "execution_count": 21, 723 | "metadata": {}, 724 | "output_type": "execute_result" 725 | } 726 | ], 727 | "source": [ 728 | "df_medals.head()" 729 | ] 730 | }, 731 | { 732 | "cell_type": "markdown", 733 | "metadata": {}, 734 | "source": [ 735 | " " 736 | ] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": {}, 741 | "source": [ 742 | "Call the sas.df2sd method to create the SAs dataset WORK.medals2" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 22, 748 | "metadata": { 749 | "collapsed": true 750 | }, 751 | "outputs": [], 752 | "source": [ 753 | "sd_medals = sas.df2sd(df_medals, 'medals2')" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": 23, 759 | "metadata": {}, 760 | "outputs": [ 761 | { 762 | "data": { 763 | "text/plain": [ 764 | "{'Attributes': Member Label1 \\\n", 765 | " 0 WORK.MEDALS2 Data Set Name \n", 766 | " 1 WORK.MEDALS2 Member Type \n", 767 | " 2 WORK.MEDALS2 Engine \n", 768 | " 3 WORK.MEDALS2 Created \n", 769 | " 4 WORK.MEDALS2 Last Modified \n", 770 | " 5 WORK.MEDALS2 Protection \n", 771 | " 6 WORK.MEDALS2 Data Set Type \n", 772 | " 7 WORK.MEDALS2 Label \n", 773 | " 8 WORK.MEDALS2 Data Representation \n", 774 | " 9 WORK.MEDALS2 Encoding \n", 775 | " \n", 776 | " cValue1 nValue1 \\\n", 777 | " 0 WORK.MEDALS2 NaN \n", 778 | " 1 DATA NaN \n", 779 | " 2 V9 NaN \n", 780 | " 3 01/14/2018 15:12:11 1.831562e+09 \n", 781 | " 4 01/14/2018 15:12:11 1.831562e+09 \n", 782 | " 5 NaN \n", 783 | " 6 NaN \n", 784 | " 7 NaN \n", 785 | " 8 SOLARIS_X86_64, LINUX_X86_64, ALPHA_TRU64, LIN... NaN \n", 786 | " 9 latin1 Western (ISO) NaN \n", 787 | " \n", 788 | " Label2 cValue2 nValue2 \n", 789 | " 0 Observations 2311 2311.0 \n", 790 | " 1 Variables 8 8.0 \n", 791 | " 2 Indexes 0 0.0 \n", 792 | " 3 Observation Length 96 96.0 \n", 793 | " 4 Deleted Observations 0 0.0 \n", 794 | " 5 Compressed NO NaN \n", 795 | " 6 Sorted NO NaN \n", 796 | " 7 0.0 \n", 797 | " 8 0.0 \n", 798 | " 9 0.0 ,\n", 799 | " 'Enginehost': Member Label1 \\\n", 800 | " 0 WORK.MEDALS2 Data Set Page Size \n", 801 | " 1 WORK.MEDALS2 Number of Data Set Pages \n", 802 | " 2 WORK.MEDALS2 First Data Page \n", 803 | " 3 WORK.MEDALS2 Max Obs per Page \n", 804 | " 4 WORK.MEDALS2 Obs in First Data Page \n", 805 | " 5 WORK.MEDALS2 Number of Data Set Repairs \n", 806 | " 6 WORK.MEDALS2 Filename \n", 807 | " 7 WORK.MEDALS2 Release Created \n", 808 | " 8 WORK.MEDALS2 Host Created \n", 809 | " 9 WORK.MEDALS2 Inode Number \n", 810 | " 10 WORK.MEDALS2 Access Permission \n", 811 | " 11 WORK.MEDALS2 Owner Name \n", 812 | " 12 WORK.MEDALS2 File Size \n", 813 | " 13 WORK.MEDALS2 File Size (bytes) \n", 814 | " \n", 815 | " cValue1 nValue1 \n", 816 | " 0 65536 65536.0 \n", 817 | " 1 4 4.0 \n", 818 | " 2 1 1.0 \n", 819 | " 3 681 681.0 \n", 820 | " 4 652 652.0 \n", 821 | " 5 0 0.0 \n", 822 | " 6 /home/sas/tmp/SAS_work41FB0000134E_localhost.l... NaN \n", 823 | " 7 9.0401M4 NaN \n", 824 | " 8 Linux NaN \n", 825 | " 9 149271914 149271914.0 \n", 826 | " 10 rw-rw-r-- NaN \n", 827 | " 11 sas NaN \n", 828 | " 12 320KB NaN \n", 829 | " 13 327680 327680.0 ,\n", 830 | " 'Variables': Member Num Variable Type Len Pos\n", 831 | " 0 WORK.MEDALS2 2 City Char 22 8\n", 832 | " 1 WORK.MEDALS2 4 Discipline Char 15 40\n", 833 | " 2 WORK.MEDALS2 6 Event Char 31 58\n", 834 | " 3 WORK.MEDALS2 7 Event gender Char 1 89\n", 835 | " 4 WORK.MEDALS2 8 Medal Char 6 90\n", 836 | " 5 WORK.MEDALS2 5 NOC Char 3 55\n", 837 | " 6 WORK.MEDALS2 3 Sport Char 10 30\n", 838 | " 7 WORK.MEDALS2 1 Year Num 8 0}" 839 | ] 840 | }, 841 | "execution_count": 23, 842 | "metadata": {}, 843 | "output_type": "execute_result" 844 | } 845 | ], 846 | "source": [ 847 | " sd_medals.contents()" 848 | ] 849 | }, 850 | { 851 | "cell_type": "code", 852 | "execution_count": 24, 853 | "metadata": {}, 854 | "outputs": [ 855 | { 856 | "data": { 857 | "text/plain": [ 858 | "Access Method = STDIO\n", 859 | "SAS Config name = default\n", 860 | "WORK Path = /home/sas/tmp/SAS_work41FB0000134E_localhost.localdomain/\n", 861 | "SAS Version = 9.04.01M4P11092016\n", 862 | "SASPy Version = 2.2.1\n", 863 | "Teach me SAS = False\n", 864 | "Batch = False\n", 865 | "Results = Pandas\n", 866 | "SAS Session Encoding = LATIN1\n", 867 | "Python Encoding value = utf-8" 868 | ] 869 | }, 870 | "execution_count": 24, 871 | "metadata": {}, 872 | "output_type": "execute_result" 873 | } 874 | ], 875 | "source": [ 876 | "sas" 877 | ] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": null, 882 | "metadata": { 883 | "collapsed": true 884 | }, 885 | "outputs": [], 886 | "source": [] 887 | } 888 | ], 889 | "metadata": { 890 | "kernelspec": { 891 | "display_name": "Python 3", 892 | "language": "python", 893 | "name": "python3" 894 | }, 895 | "language_info": { 896 | "codemirror_mode": { 897 | "name": "ipython", 898 | "version": 3 899 | }, 900 | "file_extension": ".py", 901 | "mimetype": "text/x-python", 902 | "name": "python", 903 | "nbconvert_exporter": "python", 904 | "pygments_lexer": "ipython3", 905 | "version": "3.6.3" 906 | } 907 | }, 908 | "nbformat": 4, 909 | "nbformat_minor": 2 910 | } 911 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python For SAS Users 2 | An Introduction to Python for SAS Users 3 | 4 | This repo holds my earlies work on mapping SAS syntax to Python. I maintain updated examples at: 5 | www.pythonforsasusers.com 6 | 7 | The goal for this content is to permit business users familiar with Base SAS programming to learn Python. The examples provided map SAS programming constructs and coding patterns into their Python equivalents. The primary focus is on pandas and data management issues related to analysis of data. 8 | 9 | If you are already familiar with Python you can skip the first three chapters. 10 | 11 | Feedback is appericated and can be sent to: 12 | 13 | tr dot betancourt at comcast dot net 14 | 15 | Randy Betancourt 16 | -------------------------------------------------------------------------------- /data/Duplicate_Timestamps.csv: -------------------------------------------------------------------------------- 1 | Date,Quantity 2 | 3/27/2011,1 3 | 3/27/2011,8 4 | 3/25/2012,15 5 | 3/25/2012,19 6 | 3/31/2013,106 7 | 3/31/2013,111 8 | 3/30/2014,479 9 | 3/30/2014,411 10 | 3/29/2015,253 11 | 3/29/2015,222 12 | 3/27/2016,427 13 | 3/27/2016,444 14 | 3/26/2017,4 15 | 3/26/2017,99 16 | 3/25/2018,690 17 | 3/25/2018,444 18 | -------------------------------------------------------------------------------- /data/February_2018.csv: -------------------------------------------------------------------------------- 1 | ID,Amount,Quantity 2 | 1042,$99.89 ,21 3 | 3311,$59.99 ,12 4 | 9846,12.99,25 5 | 2222,19.19,115 6 | 8931,$79.99 ,2 7 | -------------------------------------------------------------------------------- /data/February_2018.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/data/February_2018.xlsx -------------------------------------------------------------------------------- /data/January_2018.csv: -------------------------------------------------------------------------------- 1 | ID,Amount,Quantity 2 | 0044,$199.89 ,10 3 | 8731,$49.99 ,2 4 | 7846,129,45 5 | 1111,89.19,15 6 | 2231,$99.99 ,1 7 | -------------------------------------------------------------------------------- /data/January_2018.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/data/January_2018.xlsx -------------------------------------------------------------------------------- /data/Left.csv: -------------------------------------------------------------------------------- 1 | ID,Name,Gender,Dept 2 | 929,Gunter,M,Mfg 3 | 446,Harbinger,M,Mfg 4 | 228,Benito,F,Mfg 5 | 299,Rudelich,M,Sales 6 | 442,Sirignano,F,Admin 7 | 321,Morrison,M,Sales 8 | 321,Morrison,M,Sales 9 | 882,Onieda,F,Admin 10 | -------------------------------------------------------------------------------- /data/March_2018.csv: -------------------------------------------------------------------------------- 1 | ID,Amount,Quantity 2 | 0002,$79.89 ,43 3 | 2811,$19.99 ,19 4 | 8468,112.99,25 5 | 3333,129.99,11 6 | 9318,$69.99 ,12 7 | -------------------------------------------------------------------------------- /data/March_2018.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/data/March_2018.xlsx -------------------------------------------------------------------------------- /data/Right.csv: -------------------------------------------------------------------------------- 1 | ID,Salary 2 | 929,"45,650" 3 | 446,"51,290" 4 | 228,"62,000" 5 | 299,"39,800" 6 | 442,"44,345" 7 | 871,"70,000" 8 | -------------------------------------------------------------------------------- /data/Road-Accident-Safety-Data-Guide.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/data/Road-Accident-Safety-Data-Guide.xls -------------------------------------------------------------------------------- /data/messy_input.csv: -------------------------------------------------------------------------------- 1 | This Line is being used as a header,,,,, 2 | ,,,,, 3 | ID,Date,Amount,Quantity,Status, 4 | 0042,16-Oct-17,$23.99 ,123,Closed,Jansen 5 | 7731,15-Jan-17,$49.99 ,,Pending,Rho 6 | 8843,9-Mar-17,129,45,,Gupta 7 | 3013,12-Feb-17, ,15,Pending,Harrison 8 | 4431,1-Jul-17,$99.99 ,1,Closed,Yang 9 | -------------------------------------------------------------------------------- /data/messy_input.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/data/messy_input.xlsx -------------------------------------------------------------------------------- /data/tickets.csv: -------------------------------------------------------------------------------- 1 | Area,Type,Gender,Stops,Tickets 2 | City,Moving,Female,101,99 3 | City,Non-Moving,Female,117,110 4 | City,Other,Female,26,23 5 | City,Moving,Male,33,16 6 | City,Non-Moving,Male,126,105 7 | City,Other,Male,17,5 8 | City,Moving,Unknown,43,37 9 | City,Non-Moving,Unknown,58,50 10 | City,Other,Unknown,63,38 11 | Rural,Moving,Female,93,92 12 | Rural,Non-Moving,Female,127,107 13 | Rural,Other,Female,104,95 14 | Rural,Moving,Male,147,127 15 | Rural,Non-Moving,Male,69,63 16 | Rural,Other,Male,50,46 17 | Rural,Moving,Unknown,63,60 18 | Rural,Non-Moving,Unknown,97,94 19 | Rural,Other,Unknown,6,3 20 | Suburbs,Moving,Female,376,374 21 | Suburbs,Non-Moving,Female,112,107 22 | Suburbs,Other,Female,269,250 23 | Suburbs,Moving,Male,12,9 24 | Suburbs,Non-Moving,Male,8,8 25 | Suburbs,Other,Male,6,3 26 | Suburbs,Moving,Unknown,24,20 27 | Suburbs,Non-Moving,Unknown,17,12 28 | Suburbs,Other,Unknown,126,116 29 | -------------------------------------------------------------------------------- /saspy_Module.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandyBetancourt/PythonForSASUsers/6695bb69239e03476ea420c1d895492a31350076/saspy_Module.pdf --------------------------------------------------------------------------------