├── LICENSE
├── README.md
├── data
├── macrodata.csv
├── stock_px.csv
├── tips.csv
└── 個股_類別.rar
├── jpgs
└── MyPicture1.jpg
└── notebooks
├── 0. Pandas入門介紹.ipynb
├── 1. Pandas - main classes and structure.ipynb
├── 2. Pandas - IO tools
├── 2. Pandas - IO tools.ipynb
├── ex1.csv
├── ex1.pickle
├── ex2.csv
├── ex3 - 1.csv
├── ex3.csv
├── ex4.csv
├── ex5-1.csv
├── ex5.csv
├── ex6-o.csv
├── ex6.csv
├── mta.xml
├── mydata.h5
├── test.xls
└── treseries.csv
├── 3. Pandas - ETL tools.ipynb
├── 4. Pandas - plotting.ipynb
├── 5. Pandas - GroupBy.ipynb
├── 練習 - 股票資料彙整_YahooFinance - 問題.ipynb
├── 練習 - 股票資料彙整_YahooFinance - 解答.ipynb
├── 練習 - 股票資料彙整_Yahoo股市 - 問題.ipynb
└── 練習 - 股票資料彙整_Yahoo股市 - 解答.ipynb
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | Wei Lin / Introduction to Pandas
635 | Copyright (C) 2016 Wei Lin
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Introduction to Pandas Copyright (C) 2016 Wei Lin
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Pandas 入門介紹
3 | Taichung.py
4 | 2016/3/12
5 | Wei Lin
6 | [Wei1234c@gmail.com](mailto://wei1234c@gmail.com)
7 |
8 | ## Books:
9 | [Python for Data Analysis](http://www.books.com.tw/products/F012771443)
10 |
11 |
12 | ## Videos:
13 | [Data analysis in Python with pandas - Wes McKinney (Pandas之父)](https://www.youtube.com/watch?v=w26x-z-BdWQ)
14 | [Analyzing data with Pandas - PyCon SE 2015](https://www.youtube.com/watch?v=kSM8S76qYz0)
15 | [Pandas From The Ground Up - PyCon 2015](https://www.youtube.com/watch?v=5JnMutdy6Fw) / [Brandon’s Pandas Tutorial](https://github.com/brandon-rhodes/pycon-pandas-tutorial)
16 | [Hands-on Data Analysis with Python - PyCon 2015](https://www.youtube.com/watch?v=L4Hbv4ugUWk&list=PLHJdMADCPuGQeXXvSJnXGNhvYoOwcXjUD&index=4)
17 |
18 | ## Documentation:
19 | [pandas documentation — API Reference](http://pandas.pydata.org/pandas-docs/stable/api.html)
20 |
21 | ## About Me
22 | - Wei Lin : [Wei1234c@gmail.com](mailto://Wei1234c@gmail.com)
23 | - A planner in private enterprises
24 | - Started learning Python in 2015
25 | - Interested in Data-Science and A.I.
26 |
27 | ## About this talk
28 | 1. [Pandas - main classes and structure](https://github.com/Wei1234c/Introduction_to_Pandas/blob/master/notebooks/1.%20Pandas%20-%20main%20classes%20and%20structure.ipynb)
29 | 2. [Pandas - I/O tools](https://github.com/Wei1234c/Introduction_to_Pandas/blob/master/notebooks/2.%20Pandas%20-%20IO%20tools/2.%20Pandas%20-%20IO%20tools.ipynb)
30 | 3. [Pandas - ETL tools](https://github.com/Wei1234c/Introduction_to_Pandas/blob/master/notebooks/3.%20Pandas%20-%20ETL%20tools.ipynb)
31 | 4. [Pandas - plotting](https://github.com/Wei1234c/Introduction_to_Pandas/blob/master/notebooks/4.%20Pandas%20-%20plotting.ipynb)
32 | 5. [Pandas - GroupBy](https://github.com/Wei1234c/Introduction_to_Pandas/blob/master/notebooks/5.%20Pandas%20-%20GroupBy.ipynb)
33 |
34 | ## Design of this talk
35 | 1. Theory and Practice
36 | 2. Learning a language
37 | 3. What if ...
38 |
--------------------------------------------------------------------------------
/data/macrodata.csv:
--------------------------------------------------------------------------------
1 | year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
2 | 1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
3 | 1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
4 | 1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
5 | 1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
6 | 1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19
7 | 1960.0,2.0,2834.39,1792.9,298.152,460.4,1966.1,29.55,140.2,2.68,5.2,180.671,0.14,2.55
8 | 1960.0,3.0,2839.022,1785.8,296.375,474.676,1967.8,29.75,140.9,2.36,5.6,181.528,2.7,-0.34
9 | 1960.0,4.0,2802.616,1788.2,259.764,476.434,1966.6,29.84,141.1,2.29,6.3,182.287,1.21,1.08
10 | 1961.0,1.0,2819.264,1787.7,266.405,475.854,1984.5,29.81,142.1,2.37,6.8,182.992,-0.4,2.77
11 | 1961.0,2.0,2872.005,1814.3,286.246,480.328,2014.4,29.92,142.9,2.29,7.0,183.691,1.47,0.81
12 | 1961.0,3.0,2918.419,1823.1,310.227,493.828,2041.9,29.98,144.1,2.32,6.8,184.524,0.8,1.52
13 | 1961.0,4.0,2977.83,1859.6,315.463,502.521,2082.0,30.04,145.2,2.6,6.2,185.242,0.8,1.8
14 | 1962.0,1.0,3031.241,1879.4,334.271,520.96,2101.7,30.21,146.4,2.73,5.6,185.874,2.26,0.47
15 | 1962.0,2.0,3064.709,1902.5,331.039,523.066,2125.2,30.22,146.5,2.78,5.5,186.538,0.13,2.65
16 | 1962.0,3.0,3093.047,1917.9,336.962,538.838,2137.0,30.38,146.7,2.78,5.6,187.323,2.11,0.67
17 | 1962.0,4.0,3100.563,1945.1,325.65,535.912,2154.6,30.44,148.3,2.87,5.5,188.013,0.79,2.08
18 | 1963.0,1.0,3141.087,1958.2,343.721,522.917,2172.5,30.48,149.7,2.9,5.8,188.58,0.53,2.38
19 | 1963.0,2.0,3180.447,1976.9,348.73,518.108,2193.1,30.69,151.3,3.03,5.7,189.242,2.75,0.29
20 | 1963.0,3.0,3240.332,2003.8,360.102,546.893,2217.9,30.75,152.6,3.38,5.5,190.028,0.78,2.6
21 | 1963.0,4.0,3264.967,2020.6,364.534,532.383,2254.6,30.94,153.7,3.52,5.6,190.668,2.46,1.06
22 | 1964.0,1.0,3338.246,2060.5,379.523,529.686,2299.6,30.95,154.8,3.51,5.5,191.245,0.13,3.38
23 | 1964.0,2.0,3376.587,2096.7,377.778,526.175,2362.1,31.02,156.8,3.47,5.2,191.889,0.9,2.57
24 | 1964.0,3.0,3422.469,2135.2,386.754,522.008,2392.7,31.12,159.2,3.53,5.0,192.631,1.29,2.25
25 | 1964.0,4.0,3431.957,2141.2,389.91,514.603,2420.4,31.28,160.7,3.76,5.0,193.223,2.05,1.71
26 | 1965.0,1.0,3516.251,2188.8,429.145,508.006,2447.4,31.38,162.0,3.93,4.9,193.709,1.28,2.65
27 | 1965.0,2.0,3563.96,2213.0,429.119,508.931,2474.5,31.58,163.1,3.84,4.7,194.303,2.54,1.3
28 | 1965.0,3.0,3636.285,2251.0,444.444,529.446,2542.6,31.65,166.0,3.93,4.4,194.997,0.89,3.04
29 | 1965.0,4.0,3724.014,2314.3,446.493,544.121,2594.1,31.88,169.1,4.35,4.1,195.539,2.9,1.46
30 | 1966.0,1.0,3815.423,2348.5,484.244,556.593,2618.4,32.28,171.8,4.62,3.9,195.999,4.99,-0.37
31 | 1966.0,2.0,3828.124,2354.5,475.408,571.371,2624.7,32.45,170.3,4.65,3.8,196.56,2.1,2.55
32 | 1966.0,3.0,3853.301,2381.5,470.697,594.514,2657.8,32.85,171.2,5.23,3.8,197.207,4.9,0.33
33 | 1966.0,4.0,3884.52,2391.4,472.957,599.528,2688.2,32.9,171.9,5.0,3.7,197.736,0.61,4.39
34 | 1967.0,1.0,3918.74,2405.3,460.007,640.682,2728.4,33.1,174.2,4.22,3.8,198.206,2.42,1.8
35 | 1967.0,2.0,3919.556,2438.1,440.393,631.43,2750.8,33.4,178.1,3.78,3.8,198.712,3.61,0.17
36 | 1967.0,3.0,3950.826,2450.6,453.033,641.504,2777.1,33.7,181.6,4.42,3.8,199.311,3.58,0.84
37 | 1967.0,4.0,3980.97,2465.7,462.834,640.234,2797.4,34.1,184.3,4.9,3.9,199.808,4.72,0.18
38 | 1968.0,1.0,4063.013,2524.6,472.907,651.378,2846.2,34.4,186.6,5.18,3.7,200.208,3.5,1.67
39 | 1968.0,2.0,4131.998,2563.3,492.026,646.145,2893.5,34.9,190.5,5.5,3.5,200.706,5.77,-0.28
40 | 1968.0,3.0,4160.267,2611.5,476.053,640.615,2899.3,35.3,194.0,5.21,3.5,201.29,4.56,0.65
41 | 1968.0,4.0,4178.293,2623.5,480.998,636.729,2918.4,35.7,198.7,5.85,3.4,201.76,4.51,1.34
42 | 1969.0,1.0,4244.1,2652.9,512.686,633.224,2923.4,36.3,200.7,6.08,3.4,202.161,6.67,-0.58
43 | 1969.0,2.0,4256.46,2669.8,508.601,623.16,2952.9,36.8,201.7,6.49,3.4,202.677,5.47,1.02
44 | 1969.0,3.0,4283.378,2682.7,520.36,623.613,3012.9,37.3,202.9,7.02,3.6,203.302,5.4,1.63
45 | 1969.0,4.0,4263.261,2704.1,492.334,606.9,3034.9,37.9,206.2,7.64,3.6,203.849,6.38,1.26
46 | 1970.0,1.0,4256.573,2720.7,476.925,594.888,3050.1,38.5,206.7,6.76,4.2,204.401,6.28,0.47
47 | 1970.0,2.0,4264.289,2733.2,478.419,576.257,3103.5,38.9,208.0,6.66,4.8,205.052,4.13,2.52
48 | 1970.0,3.0,4302.259,2757.1,486.594,567.743,3145.4,39.4,212.9,6.15,5.2,205.788,5.11,1.04
49 | 1970.0,4.0,4256.637,2749.6,458.406,564.666,3135.1,39.9,215.5,4.86,5.8,206.466,5.04,-0.18
50 | 1971.0,1.0,4374.016,2802.2,517.935,542.709,3197.3,40.1,220.0,3.65,5.9,207.065,2.0,1.65
51 | 1971.0,2.0,4398.829,2827.9,533.986,534.905,3245.3,40.6,224.9,4.76,5.9,207.661,4.96,-0.19
52 | 1971.0,3.0,4433.943,2850.4,541.01,532.646,3259.7,40.9,227.2,4.7,6.0,208.345,2.94,1.75
53 | 1971.0,4.0,4446.264,2897.8,524.085,516.14,3294.2,41.2,230.1,3.87,6.0,208.917,2.92,0.95
54 | 1972.0,1.0,4525.769,2936.5,561.147,518.192,3314.9,41.5,235.6,3.55,5.8,209.386,2.9,0.64
55 | 1972.0,2.0,4633.101,2992.6,595.495,526.473,3346.1,41.8,238.8,3.86,5.7,209.896,2.88,0.98
56 | 1972.0,3.0,4677.503,3038.8,603.97,498.116,3414.6,42.2,245.0,4.47,5.6,210.479,3.81,0.66
57 | 1972.0,4.0,4754.546,3110.1,607.104,496.54,3550.5,42.7,251.5,5.09,5.3,210.985,4.71,0.38
58 | 1973.0,1.0,4876.166,3167.0,645.654,504.838,3590.7,43.7,252.7,5.98,5.0,211.42,9.26,-3.28
59 | 1973.0,2.0,4932.571,3165.4,675.837,497.033,3626.2,44.2,257.5,7.19,4.9,211.909,4.55,2.64
60 | 1973.0,3.0,4906.252,3176.7,649.412,475.897,3644.4,45.6,259.0,8.06,4.8,212.475,12.47,-4.41
61 | 1973.0,4.0,4953.05,3167.4,674.253,476.174,3688.9,46.8,263.8,7.68,4.8,212.932,10.39,-2.71
62 | 1974.0,1.0,4909.617,3139.7,631.23,491.043,3632.3,48.1,267.2,7.8,5.1,213.361,10.96,-3.16
63 | 1974.0,2.0,4922.188,3150.6,628.102,490.177,3601.1,49.3,269.3,7.89,5.2,213.854,9.86,-1.96
64 | 1974.0,3.0,4873.52,3163.6,592.672,492.586,3612.4,51.0,272.3,8.16,5.6,214.451,13.56,-5.4
65 | 1974.0,4.0,4854.34,3117.3,598.306,496.176,3596.0,52.3,273.9,6.96,6.6,214.931,10.07,-3.11
66 | 1975.0,1.0,4795.295,3143.4,493.212,490.603,3581.9,53.0,276.2,5.53,8.2,215.353,5.32,0.22
67 | 1975.0,2.0,4831.942,3195.8,476.085,486.679,3749.3,54.0,283.7,5.57,8.9,215.973,7.48,-1.91
68 | 1975.0,3.0,4913.328,3241.4,516.402,498.836,3698.6,54.9,285.4,6.27,8.5,216.587,6.61,-0.34
69 | 1975.0,4.0,4977.511,3275.7,530.596,500.141,3736.0,55.8,288.4,5.26,8.3,217.095,6.5,-1.24
70 | 1976.0,1.0,5090.663,3341.2,585.541,495.568,3791.0,56.1,294.7,4.91,7.7,217.528,2.14,2.77
71 | 1976.0,2.0,5128.947,3371.8,610.513,494.532,3822.2,57.0,297.2,5.28,7.6,218.035,6.37,-1.09
72 | 1976.0,3.0,5154.072,3407.5,611.646,493.141,3856.7,57.9,302.0,5.05,7.7,218.644,6.27,-1.22
73 | 1976.0,4.0,5191.499,3451.8,615.898,494.415,3884.4,58.7,308.3,4.57,7.8,219.179,5.49,-0.92
74 | 1977.0,1.0,5251.762,3491.3,646.198,498.509,3887.5,60.0,316.0,4.6,7.5,219.684,8.76,-4.16
75 | 1977.0,2.0,5356.131,3510.6,696.141,506.695,3931.8,60.8,320.2,5.06,7.1,220.239,5.3,-0.24
76 | 1977.0,3.0,5451.921,3544.1,734.078,509.605,3990.8,61.6,326.4,5.82,6.9,220.904,5.23,0.59
77 | 1977.0,4.0,5450.793,3597.5,713.356,504.584,4071.2,62.7,334.4,6.2,6.6,221.477,7.08,-0.88
78 | 1978.0,1.0,5469.405,3618.5,727.504,506.314,4096.4,63.9,339.9,6.34,6.3,221.991,7.58,-1.24
79 | 1978.0,2.0,5684.569,3695.9,777.454,518.366,4143.4,65.5,347.6,6.72,6.0,222.585,9.89,-3.18
80 | 1978.0,3.0,5740.3,3711.4,801.452,520.199,4177.1,67.1,353.3,7.64,6.0,223.271,9.65,-2.01
81 | 1978.0,4.0,5816.222,3741.3,819.689,524.782,4209.8,68.5,358.6,9.02,5.9,223.865,8.26,0.76
82 | 1979.0,1.0,5825.949,3760.2,819.556,525.524,4255.9,70.6,368.0,9.42,5.9,224.438,12.08,-2.66
83 | 1979.0,2.0,5831.418,3758.0,817.66,532.04,4226.1,73.0,377.2,9.3,5.7,225.055,13.37,-4.07
84 | 1979.0,3.0,5873.335,3794.9,801.742,531.232,4250.3,75.2,380.8,10.49,5.9,225.801,11.88,-1.38
85 | 1979.0,4.0,5889.495,3805.0,786.817,531.126,4284.3,78.0,385.8,11.94,5.9,226.451,14.62,-2.68
86 | 1980.0,1.0,5908.467,3798.4,781.114,548.115,4296.2,80.9,383.8,13.75,6.3,227.061,14.6,-0.85
87 | 1980.0,2.0,5787.373,3712.2,710.64,561.895,4236.1,82.6,394.0,7.9,7.3,227.726,8.32,-0.42
88 | 1980.0,3.0,5776.617,3752.0,656.477,554.292,4279.7,84.7,409.0,10.34,7.7,228.417,10.04,0.3
89 | 1980.0,4.0,5883.46,3802.0,723.22,556.13,4368.1,87.2,411.3,14.75,7.4,228.937,11.64,3.11
90 | 1981.0,1.0,6005.717,3822.8,795.091,567.618,4358.1,89.1,427.4,13.95,7.4,229.403,8.62,5.32
91 | 1981.0,2.0,5957.795,3822.8,757.24,584.54,4358.6,91.5,426.9,15.33,7.4,229.966,10.63,4.69
92 | 1981.0,3.0,6030.184,3838.3,804.242,583.89,4455.4,93.4,428.4,14.58,7.4,230.641,8.22,6.36
93 | 1981.0,4.0,5955.062,3809.3,773.053,590.125,4464.4,94.4,442.7,11.33,8.2,231.157,4.26,7.07
94 | 1982.0,1.0,5857.333,3833.9,692.514,591.043,4469.6,95.0,447.1,12.95,8.8,231.645,2.53,10.42
95 | 1982.0,2.0,5889.074,3847.7,691.9,596.403,4500.8,97.5,448.0,11.97,9.4,232.188,10.39,1.58
96 | 1982.0,3.0,5866.37,3877.2,683.825,605.37,4520.6,98.1,464.5,8.1,9.9,232.816,2.45,5.65
97 | 1982.0,4.0,5871.001,3947.9,622.93,623.307,4536.4,97.9,477.2,7.96,10.7,233.322,-0.82,8.77
98 | 1983.0,1.0,5944.02,3986.6,645.11,630.873,4572.2,98.8,493.2,8.22,10.4,233.781,3.66,4.56
99 | 1983.0,2.0,6077.619,4065.7,707.372,644.322,4605.5,99.8,507.8,8.69,10.1,234.307,4.03,4.66
100 | 1983.0,3.0,6197.468,4137.6,754.937,662.412,4674.7,100.8,517.2,8.99,9.4,234.907,3.99,5.01
101 | 1983.0,4.0,6325.574,4203.2,834.427,639.197,4771.1,102.1,525.1,8.89,8.5,235.385,5.13,3.76
102 | 1984.0,1.0,6448.264,4239.2,921.763,644.635,4875.4,103.3,535.0,9.43,7.9,235.839,4.67,4.76
103 | 1984.0,2.0,6559.594,4299.9,952.841,664.839,4959.4,104.1,540.9,9.94,7.5,236.348,3.09,6.85
104 | 1984.0,3.0,6623.343,4333.0,974.989,662.294,5036.6,105.1,543.7,10.19,7.4,236.976,3.82,6.37
105 | 1984.0,4.0,6677.264,4390.1,958.993,684.282,5084.5,105.7,557.0,8.14,7.3,237.468,2.28,5.87
106 | 1985.0,1.0,6740.275,4464.6,927.375,691.613,5072.0,107.0,570.4,8.25,7.3,237.9,4.89,3.36
107 | 1985.0,2.0,6797.344,4505.2,943.383,708.524,5172.7,107.7,589.1,7.17,7.3,238.466,2.61,4.56
108 | 1985.0,3.0,6903.523,4590.8,932.959,732.305,5140.7,108.5,607.8,7.13,7.2,239.113,2.96,4.17
109 | 1985.0,4.0,6955.918,4600.9,969.434,732.026,5193.9,109.9,621.4,7.14,7.0,239.638,5.13,2.01
110 | 1986.0,1.0,7022.757,4639.3,967.442,728.125,5255.8,108.7,641.0,6.56,7.0,240.094,-4.39,10.95
111 | 1986.0,2.0,7050.969,4688.7,945.972,751.334,5315.5,109.5,670.3,6.06,7.2,240.651,2.93,3.13
112 | 1986.0,3.0,7118.95,4770.7,916.315,779.77,5343.3,110.2,694.9,5.31,7.0,241.274,2.55,2.76
113 | 1986.0,4.0,7153.359,4799.4,917.736,767.671,5346.5,111.4,730.2,5.44,6.8,241.784,4.33,1.1
114 | 1987.0,1.0,7193.019,4792.1,945.776,772.247,5379.4,112.7,743.9,5.61,6.6,242.252,4.64,0.97
115 | 1987.0,2.0,7269.51,4856.3,947.1,782.962,5321.0,113.8,743.0,5.67,6.3,242.804,3.89,1.79
116 | 1987.0,3.0,7332.558,4910.4,948.055,783.804,5416.2,115.0,756.2,6.19,6.0,243.446,4.2,1.99
117 | 1987.0,4.0,7458.022,4922.2,1021.98,795.467,5493.1,116.0,756.2,5.76,5.9,243.981,3.46,2.29
118 | 1988.0,1.0,7496.6,5004.4,964.398,773.851,5562.1,117.2,768.1,5.76,5.7,244.445,4.12,1.64
119 | 1988.0,2.0,7592.881,5040.8,987.858,765.98,5614.3,118.5,781.4,6.48,5.5,245.021,4.41,2.07
120 | 1988.0,3.0,7632.082,5080.6,994.204,760.245,5657.5,119.9,783.3,7.22,5.5,245.693,4.7,2.52
121 | 1988.0,4.0,7733.991,5140.4,1007.371,783.065,5708.5,121.2,785.7,8.03,5.3,246.224,4.31,3.72
122 | 1989.0,1.0,7806.603,5159.3,1045.975,767.024,5773.4,123.1,779.2,8.67,5.2,246.721,6.22,2.44
123 | 1989.0,2.0,7865.016,5182.4,1033.753,784.275,5749.8,124.5,777.8,8.15,5.2,247.342,4.52,3.63
124 | 1989.0,3.0,7927.393,5236.1,1021.604,791.819,5787.0,125.4,786.6,7.76,5.3,248.067,2.88,4.88
125 | 1989.0,4.0,7944.697,5261.7,1011.119,787.844,5831.3,127.5,795.4,7.65,5.4,248.659,6.64,1.01
126 | 1990.0,1.0,8027.693,5303.3,1021.07,799.681,5875.1,128.9,806.2,7.8,5.3,249.306,4.37,3.44
127 | 1990.0,2.0,8059.598,5320.8,1021.36,800.639,5913.9,130.5,810.1,7.7,5.3,250.132,4.93,2.76
128 | 1990.0,3.0,8059.476,5341.0,997.319,793.513,5918.1,133.4,819.8,7.33,5.7,251.057,8.79,-1.46
129 | 1990.0,4.0,7988.864,5299.5,934.248,800.525,5878.2,134.7,827.2,6.67,6.1,251.889,3.88,2.79
130 | 1991.0,1.0,7950.164,5284.4,896.21,806.775,5896.3,135.1,843.2,5.83,6.6,252.643,1.19,4.65
131 | 1991.0,2.0,8003.822,5324.7,891.704,809.081,5941.1,136.2,861.5,5.54,6.8,253.493,3.24,2.29
132 | 1991.0,3.0,8037.538,5345.0,913.904,793.987,5953.6,137.2,878.0,5.18,6.9,254.435,2.93,2.25
133 | 1991.0,4.0,8069.046,5342.6,948.891,778.378,5992.4,138.3,910.4,4.14,7.1,255.214,3.19,0.95
134 | 1992.0,1.0,8157.616,5434.5,927.796,778.568,6082.9,139.4,943.8,3.88,7.4,255.992,3.17,0.71
135 | 1992.0,2.0,8244.294,5466.7,988.912,777.762,6129.5,140.5,963.2,3.5,7.6,256.894,3.14,0.36
136 | 1992.0,3.0,8329.361,5527.1,999.135,786.639,6160.6,141.7,1003.8,2.97,7.6,257.861,3.4,-0.44
137 | 1992.0,4.0,8417.016,5594.6,1030.758,787.064,6248.2,142.8,1030.4,3.12,7.4,258.679,3.09,0.02
138 | 1993.0,1.0,8432.485,5617.2,1054.979,762.901,6156.5,143.8,1047.6,2.92,7.2,259.414,2.79,0.13
139 | 1993.0,2.0,8486.435,5671.1,1063.263,752.158,6252.3,144.5,1084.5,3.02,7.1,260.255,1.94,1.08
140 | 1993.0,3.0,8531.108,5732.7,1062.514,744.227,6265.7,145.6,1113.0,3.0,6.8,261.163,3.03,-0.04
141 | 1993.0,4.0,8643.769,5783.7,1118.583,748.102,6358.1,146.3,1131.6,3.05,6.6,261.919,1.92,1.13
142 | 1994.0,1.0,8727.919,5848.1,1166.845,721.288,6332.6,147.2,1141.1,3.48,6.6,262.631,2.45,1.02
143 | 1994.0,2.0,8847.303,5891.5,1234.855,717.197,6440.6,148.4,1150.5,4.2,6.2,263.436,3.25,0.96
144 | 1994.0,3.0,8904.289,5938.7,1212.655,736.89,6487.9,149.4,1150.1,4.68,6.0,264.301,2.69,2.0
145 | 1994.0,4.0,9003.18,5997.3,1269.19,716.702,6574.0,150.5,1151.4,5.53,5.6,265.044,2.93,2.6
146 | 1995.0,1.0,9025.267,6004.3,1282.09,715.326,6616.6,151.8,1149.3,5.72,5.5,265.755,3.44,2.28
147 | 1995.0,2.0,9044.668,6053.5,1247.61,712.492,6617.2,152.6,1145.4,5.52,5.7,266.557,2.1,3.42
148 | 1995.0,3.0,9120.684,6107.6,1235.601,707.649,6666.8,153.5,1137.3,5.32,5.7,267.456,2.35,2.97
149 | 1995.0,4.0,9184.275,6150.6,1270.392,681.081,6706.2,154.7,1123.5,5.17,5.6,268.151,3.11,2.05
150 | 1996.0,1.0,9247.188,6206.9,1287.128,695.265,6777.7,156.1,1124.8,4.91,5.5,268.853,3.6,1.31
151 | 1996.0,2.0,9407.052,6277.1,1353.795,705.172,6850.6,157.0,1112.4,5.09,5.5,269.667,2.3,2.79
152 | 1996.0,3.0,9488.879,6314.6,1422.059,692.741,6908.9,158.2,1086.1,5.04,5.3,270.581,3.05,2.0
153 | 1996.0,4.0,9592.458,6366.1,1418.193,690.744,6946.8,159.4,1081.5,4.99,5.3,271.36,3.02,1.97
154 | 1997.0,1.0,9666.235,6430.2,1451.304,681.445,7008.9,159.9,1063.8,5.1,5.2,272.083,1.25,3.85
155 | 1997.0,2.0,9809.551,6456.2,1543.976,693.525,7061.5,160.4,1066.2,5.01,5.0,272.912,1.25,3.76
156 | 1997.0,3.0,9932.672,6566.0,1571.426,691.261,7142.4,161.5,1065.5,5.02,4.9,273.852,2.73,2.29
157 | 1997.0,4.0,10008.874,6641.1,1596.523,690.311,7241.5,162.0,1074.4,5.11,4.7,274.626,1.24,3.88
158 | 1998.0,1.0,10103.425,6707.2,1672.732,668.783,7406.2,162.2,1076.1,5.02,4.6,275.304,0.49,4.53
159 | 1998.0,2.0,10194.277,6822.6,1652.716,687.184,7512.0,163.2,1075.0,4.98,4.4,276.115,2.46,2.52
160 | 1998.0,3.0,10328.787,6913.1,1700.071,681.472,7591.0,163.9,1086.0,4.49,4.5,277.003,1.71,2.78
161 | 1998.0,4.0,10507.575,7019.1,1754.743,688.147,7646.5,164.7,1097.8,4.38,4.4,277.79,1.95,2.43
162 | 1999.0,1.0,10601.179,7088.3,1809.993,683.601,7698.4,165.9,1101.9,4.39,4.3,278.451,2.9,1.49
163 | 1999.0,2.0,10684.049,7199.9,1803.674,683.594,7716.0,166.7,1098.7,4.54,4.3,279.295,1.92,2.62
164 | 1999.0,3.0,10819.914,7286.4,1848.949,697.936,7765.9,168.1,1102.3,4.75,4.2,280.203,3.35,1.41
165 | 1999.0,4.0,11014.254,7389.2,1914.567,713.445,7887.7,169.3,1121.9,5.2,4.1,280.976,2.85,2.35
166 | 2000.0,1.0,11043.044,7501.3,1887.836,685.216,8053.4,170.9,1113.5,5.63,4.0,281.653,3.76,1.87
167 | 2000.0,2.0,11258.454,7571.8,2018.529,712.641,8135.9,172.7,1103.0,5.81,3.9,282.385,4.19,1.62
168 | 2000.0,3.0,11267.867,7645.9,1986.956,698.827,8222.3,173.9,1098.7,6.07,4.0,283.19,2.77,3.3
169 | 2000.0,4.0,11334.544,7713.5,1987.845,695.597,8234.6,175.6,1097.7,5.7,3.9,283.9,3.89,1.81
170 | 2001.0,1.0,11297.171,7744.3,1882.691,710.403,8296.5,176.4,1114.9,4.39,4.2,284.55,1.82,2.57
171 | 2001.0,2.0,11371.251,7773.5,1876.65,725.623,8273.7,177.4,1139.7,3.54,4.4,285.267,2.26,1.28
172 | 2001.0,3.0,11340.075,7807.7,1837.074,730.493,8484.5,177.6,1166.0,2.72,4.8,286.047,0.45,2.27
173 | 2001.0,4.0,11380.128,7930.0,1731.189,739.318,8385.5,177.7,1190.9,1.74,5.5,286.728,0.23,1.51
174 | 2002.0,1.0,11477.868,7957.3,1789.327,756.915,8611.6,179.3,1185.9,1.75,5.7,287.328,3.59,-1.84
175 | 2002.0,2.0,11538.77,7997.8,1810.779,774.408,8658.9,180.0,1199.5,1.7,5.8,288.028,1.56,0.14
176 | 2002.0,3.0,11596.43,8052.0,1814.531,786.673,8629.2,181.2,1204.0,1.61,5.7,288.783,2.66,-1.05
177 | 2002.0,4.0,11598.824,8080.6,1813.219,799.967,8649.6,182.6,1226.8,1.2,5.8,289.421,3.08,-1.88
178 | 2003.0,1.0,11645.819,8122.3,1813.141,800.196,8681.3,183.2,1248.4,1.14,5.9,290.019,1.31,-0.17
179 | 2003.0,2.0,11738.706,8197.8,1823.698,838.775,8812.5,183.7,1287.9,0.96,6.2,290.704,1.09,-0.13
180 | 2003.0,3.0,11935.461,8312.1,1889.883,839.598,8935.4,184.9,1297.3,0.94,6.1,291.449,2.6,-1.67
181 | 2003.0,4.0,12042.817,8358.0,1959.783,845.722,8986.4,186.3,1306.1,0.9,5.8,292.057,3.02,-2.11
182 | 2004.0,1.0,12127.623,8437.6,1970.015,856.57,9025.9,187.4,1332.1,0.94,5.7,292.635,2.35,-1.42
183 | 2004.0,2.0,12213.818,8483.2,2055.58,861.44,9115.0,189.1,1340.5,1.21,5.6,293.31,3.61,-2.41
184 | 2004.0,3.0,12303.533,8555.8,2082.231,876.385,9175.9,190.8,1361.0,1.63,5.4,294.066,3.58,-1.95
185 | 2004.0,4.0,12410.282,8654.2,2125.152,865.596,9303.4,191.8,1366.6,2.2,5.4,294.741,2.09,0.11
186 | 2005.0,1.0,12534.113,8719.0,2170.299,869.204,9189.6,193.8,1357.8,2.69,5.3,295.308,4.15,-1.46
187 | 2005.0,2.0,12587.535,8802.9,2131.468,870.044,9253.0,194.7,1366.6,3.01,5.1,295.994,1.85,1.16
188 | 2005.0,3.0,12683.153,8865.6,2154.949,890.394,9308.0,199.2,1375.0,3.52,5.0,296.77,9.14,-5.62
189 | 2005.0,4.0,12748.699,8888.5,2232.193,875.557,9358.7,199.4,1380.6,4.0,4.9,297.435,0.4,3.6
190 | 2006.0,1.0,12915.938,8986.6,2264.721,900.511,9533.8,200.7,1380.5,4.51,4.7,298.061,2.6,1.91
191 | 2006.0,2.0,12962.462,9035.0,2261.247,892.839,9617.3,202.7,1369.2,4.82,4.7,298.766,3.97,0.85
192 | 2006.0,3.0,12965.916,9090.7,2229.636,892.002,9662.5,201.9,1369.4,4.9,4.7,299.593,-1.58,6.48
193 | 2006.0,4.0,13060.679,9181.6,2165.966,894.404,9788.8,203.574,1373.6,4.92,4.4,300.32,3.3,1.62
194 | 2007.0,1.0,13099.901,9265.1,2132.609,882.766,9830.2,205.92,1379.7,4.95,4.5,300.977,4.58,0.36
195 | 2007.0,2.0,13203.977,9291.5,2162.214,898.713,9842.7,207.338,1370.0,4.72,4.5,301.714,2.75,1.97
196 | 2007.0,3.0,13321.109,9335.6,2166.491,918.983,9883.9,209.133,1379.2,4.0,4.7,302.509,3.45,0.55
197 | 2007.0,4.0,13391.249,9363.6,2123.426,925.11,9886.2,212.495,1377.4,3.01,4.8,303.204,6.38,-3.37
198 | 2008.0,1.0,13366.865,9349.6,2082.886,943.372,9826.8,213.997,1384.0,1.56,4.9,303.803,2.82,-1.26
199 | 2008.0,2.0,13415.266,9351.0,2026.518,961.28,10059.0,218.61,1409.3,1.74,5.4,304.483,8.53,-6.79
200 | 2008.0,3.0,13324.6,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.27,-3.16,4.33
201 | 2008.0,4.0,13141.92,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
202 | 2009.0,1.0,12925.41,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
203 | 2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19
204 | 2009.0,3.0,12990.341,9256.0,1486.398,1044.088,10040.6,216.385,1673.9,0.12,9.6,308.013,3.56,-3.44
205 |
--------------------------------------------------------------------------------
/data/tips.csv:
--------------------------------------------------------------------------------
1 | total_bill,tip,sex,smoker,day,time,size
2 | 16.99,1.01,Female,No,Sun,Dinner,2
3 | 10.34,1.66,Male,No,Sun,Dinner,3
4 | 21.01,3.5,Male,No,Sun,Dinner,3
5 | 23.68,3.31,Male,No,Sun,Dinner,2
6 | 24.59,3.61,Female,No,Sun,Dinner,4
7 | 25.29,4.71,Male,No,Sun,Dinner,4
8 | 8.77,2.0,Male,No,Sun,Dinner,2
9 | 26.88,3.12,Male,No,Sun,Dinner,4
10 | 15.04,1.96,Male,No,Sun,Dinner,2
11 | 14.78,3.23,Male,No,Sun,Dinner,2
12 | 10.27,1.71,Male,No,Sun,Dinner,2
13 | 35.26,5.0,Female,No,Sun,Dinner,4
14 | 15.42,1.57,Male,No,Sun,Dinner,2
15 | 18.43,3.0,Male,No,Sun,Dinner,4
16 | 14.83,3.02,Female,No,Sun,Dinner,2
17 | 21.58,3.92,Male,No,Sun,Dinner,2
18 | 10.33,1.67,Female,No,Sun,Dinner,3
19 | 16.29,3.71,Male,No,Sun,Dinner,3
20 | 16.97,3.5,Female,No,Sun,Dinner,3
21 | 20.65,3.35,Male,No,Sat,Dinner,3
22 | 17.92,4.08,Male,No,Sat,Dinner,2
23 | 20.29,2.75,Female,No,Sat,Dinner,2
24 | 15.77,2.23,Female,No,Sat,Dinner,2
25 | 39.42,7.58,Male,No,Sat,Dinner,4
26 | 19.82,3.18,Male,No,Sat,Dinner,2
27 | 17.81,2.34,Male,No,Sat,Dinner,4
28 | 13.37,2.0,Male,No,Sat,Dinner,2
29 | 12.69,2.0,Male,No,Sat,Dinner,2
30 | 21.7,4.3,Male,No,Sat,Dinner,2
31 | 19.65,3.0,Female,No,Sat,Dinner,2
32 | 9.55,1.45,Male,No,Sat,Dinner,2
33 | 18.35,2.5,Male,No,Sat,Dinner,4
34 | 15.06,3.0,Female,No,Sat,Dinner,2
35 | 20.69,2.45,Female,No,Sat,Dinner,4
36 | 17.78,3.27,Male,No,Sat,Dinner,2
37 | 24.06,3.6,Male,No,Sat,Dinner,3
38 | 16.31,2.0,Male,No,Sat,Dinner,3
39 | 16.93,3.07,Female,No,Sat,Dinner,3
40 | 18.69,2.31,Male,No,Sat,Dinner,3
41 | 31.27,5.0,Male,No,Sat,Dinner,3
42 | 16.04,2.24,Male,No,Sat,Dinner,3
43 | 17.46,2.54,Male,No,Sun,Dinner,2
44 | 13.94,3.06,Male,No,Sun,Dinner,2
45 | 9.68,1.32,Male,No,Sun,Dinner,2
46 | 30.4,5.6,Male,No,Sun,Dinner,4
47 | 18.29,3.0,Male,No,Sun,Dinner,2
48 | 22.23,5.0,Male,No,Sun,Dinner,2
49 | 32.4,6.0,Male,No,Sun,Dinner,4
50 | 28.55,2.05,Male,No,Sun,Dinner,3
51 | 18.04,3.0,Male,No,Sun,Dinner,2
52 | 12.54,2.5,Male,No,Sun,Dinner,2
53 | 10.29,2.6,Female,No,Sun,Dinner,2
54 | 34.81,5.2,Female,No,Sun,Dinner,4
55 | 9.94,1.56,Male,No,Sun,Dinner,2
56 | 25.56,4.34,Male,No,Sun,Dinner,4
57 | 19.49,3.51,Male,No,Sun,Dinner,2
58 | 38.01,3.0,Male,Yes,Sat,Dinner,4
59 | 26.41,1.5,Female,No,Sat,Dinner,2
60 | 11.24,1.76,Male,Yes,Sat,Dinner,2
61 | 48.27,6.73,Male,No,Sat,Dinner,4
62 | 20.29,3.21,Male,Yes,Sat,Dinner,2
63 | 13.81,2.0,Male,Yes,Sat,Dinner,2
64 | 11.02,1.98,Male,Yes,Sat,Dinner,2
65 | 18.29,3.76,Male,Yes,Sat,Dinner,4
66 | 17.59,2.64,Male,No,Sat,Dinner,3
67 | 20.08,3.15,Male,No,Sat,Dinner,3
68 | 16.45,2.47,Female,No,Sat,Dinner,2
69 | 3.07,1.0,Female,Yes,Sat,Dinner,1
70 | 20.23,2.01,Male,No,Sat,Dinner,2
71 | 15.01,2.09,Male,Yes,Sat,Dinner,2
72 | 12.02,1.97,Male,No,Sat,Dinner,2
73 | 17.07,3.0,Female,No,Sat,Dinner,3
74 | 26.86,3.14,Female,Yes,Sat,Dinner,2
75 | 25.28,5.0,Female,Yes,Sat,Dinner,2
76 | 14.73,2.2,Female,No,Sat,Dinner,2
77 | 10.51,1.25,Male,No,Sat,Dinner,2
78 | 17.92,3.08,Male,Yes,Sat,Dinner,2
79 | 27.2,4.0,Male,No,Thur,Lunch,4
80 | 22.76,3.0,Male,No,Thur,Lunch,2
81 | 17.29,2.71,Male,No,Thur,Lunch,2
82 | 19.44,3.0,Male,Yes,Thur,Lunch,2
83 | 16.66,3.4,Male,No,Thur,Lunch,2
84 | 10.07,1.83,Female,No,Thur,Lunch,1
85 | 32.68,5.0,Male,Yes,Thur,Lunch,2
86 | 15.98,2.03,Male,No,Thur,Lunch,2
87 | 34.83,5.17,Female,No,Thur,Lunch,4
88 | 13.03,2.0,Male,No,Thur,Lunch,2
89 | 18.28,4.0,Male,No,Thur,Lunch,2
90 | 24.71,5.85,Male,No,Thur,Lunch,2
91 | 21.16,3.0,Male,No,Thur,Lunch,2
92 | 28.97,3.0,Male,Yes,Fri,Dinner,2
93 | 22.49,3.5,Male,No,Fri,Dinner,2
94 | 5.75,1.0,Female,Yes,Fri,Dinner,2
95 | 16.32,4.3,Female,Yes,Fri,Dinner,2
96 | 22.75,3.25,Female,No,Fri,Dinner,2
97 | 40.17,4.73,Male,Yes,Fri,Dinner,4
98 | 27.28,4.0,Male,Yes,Fri,Dinner,2
99 | 12.03,1.5,Male,Yes,Fri,Dinner,2
100 | 21.01,3.0,Male,Yes,Fri,Dinner,2
101 | 12.46,1.5,Male,No,Fri,Dinner,2
102 | 11.35,2.5,Female,Yes,Fri,Dinner,2
103 | 15.38,3.0,Female,Yes,Fri,Dinner,2
104 | 44.3,2.5,Female,Yes,Sat,Dinner,3
105 | 22.42,3.48,Female,Yes,Sat,Dinner,2
106 | 20.92,4.08,Female,No,Sat,Dinner,2
107 | 15.36,1.64,Male,Yes,Sat,Dinner,2
108 | 20.49,4.06,Male,Yes,Sat,Dinner,2
109 | 25.21,4.29,Male,Yes,Sat,Dinner,2
110 | 18.24,3.76,Male,No,Sat,Dinner,2
111 | 14.31,4.0,Female,Yes,Sat,Dinner,2
112 | 14.0,3.0,Male,No,Sat,Dinner,2
113 | 7.25,1.0,Female,No,Sat,Dinner,1
114 | 38.07,4.0,Male,No,Sun,Dinner,3
115 | 23.95,2.55,Male,No,Sun,Dinner,2
116 | 25.71,4.0,Female,No,Sun,Dinner,3
117 | 17.31,3.5,Female,No,Sun,Dinner,2
118 | 29.93,5.07,Male,No,Sun,Dinner,4
119 | 10.65,1.5,Female,No,Thur,Lunch,2
120 | 12.43,1.8,Female,No,Thur,Lunch,2
121 | 24.08,2.92,Female,No,Thur,Lunch,4
122 | 11.69,2.31,Male,No,Thur,Lunch,2
123 | 13.42,1.68,Female,No,Thur,Lunch,2
124 | 14.26,2.5,Male,No,Thur,Lunch,2
125 | 15.95,2.0,Male,No,Thur,Lunch,2
126 | 12.48,2.52,Female,No,Thur,Lunch,2
127 | 29.8,4.2,Female,No,Thur,Lunch,6
128 | 8.52,1.48,Male,No,Thur,Lunch,2
129 | 14.52,2.0,Female,No,Thur,Lunch,2
130 | 11.38,2.0,Female,No,Thur,Lunch,2
131 | 22.82,2.18,Male,No,Thur,Lunch,3
132 | 19.08,1.5,Male,No,Thur,Lunch,2
133 | 20.27,2.83,Female,No,Thur,Lunch,2
134 | 11.17,1.5,Female,No,Thur,Lunch,2
135 | 12.26,2.0,Female,No,Thur,Lunch,2
136 | 18.26,3.25,Female,No,Thur,Lunch,2
137 | 8.51,1.25,Female,No,Thur,Lunch,2
138 | 10.33,2.0,Female,No,Thur,Lunch,2
139 | 14.15,2.0,Female,No,Thur,Lunch,2
140 | 16.0,2.0,Male,Yes,Thur,Lunch,2
141 | 13.16,2.75,Female,No,Thur,Lunch,2
142 | 17.47,3.5,Female,No,Thur,Lunch,2
143 | 34.3,6.7,Male,No,Thur,Lunch,6
144 | 41.19,5.0,Male,No,Thur,Lunch,5
145 | 27.05,5.0,Female,No,Thur,Lunch,6
146 | 16.43,2.3,Female,No,Thur,Lunch,2
147 | 8.35,1.5,Female,No,Thur,Lunch,2
148 | 18.64,1.36,Female,No,Thur,Lunch,3
149 | 11.87,1.63,Female,No,Thur,Lunch,2
150 | 9.78,1.73,Male,No,Thur,Lunch,2
151 | 7.51,2.0,Male,No,Thur,Lunch,2
152 | 14.07,2.5,Male,No,Sun,Dinner,2
153 | 13.13,2.0,Male,No,Sun,Dinner,2
154 | 17.26,2.74,Male,No,Sun,Dinner,3
155 | 24.55,2.0,Male,No,Sun,Dinner,4
156 | 19.77,2.0,Male,No,Sun,Dinner,4
157 | 29.85,5.14,Female,No,Sun,Dinner,5
158 | 48.17,5.0,Male,No,Sun,Dinner,6
159 | 25.0,3.75,Female,No,Sun,Dinner,4
160 | 13.39,2.61,Female,No,Sun,Dinner,2
161 | 16.49,2.0,Male,No,Sun,Dinner,4
162 | 21.5,3.5,Male,No,Sun,Dinner,4
163 | 12.66,2.5,Male,No,Sun,Dinner,2
164 | 16.21,2.0,Female,No,Sun,Dinner,3
165 | 13.81,2.0,Male,No,Sun,Dinner,2
166 | 17.51,3.0,Female,Yes,Sun,Dinner,2
167 | 24.52,3.48,Male,No,Sun,Dinner,3
168 | 20.76,2.24,Male,No,Sun,Dinner,2
169 | 31.71,4.5,Male,No,Sun,Dinner,4
170 | 10.59,1.61,Female,Yes,Sat,Dinner,2
171 | 10.63,2.0,Female,Yes,Sat,Dinner,2
172 | 50.81,10.0,Male,Yes,Sat,Dinner,3
173 | 15.81,3.16,Male,Yes,Sat,Dinner,2
174 | 7.25,5.15,Male,Yes,Sun,Dinner,2
175 | 31.85,3.18,Male,Yes,Sun,Dinner,2
176 | 16.82,4.0,Male,Yes,Sun,Dinner,2
177 | 32.9,3.11,Male,Yes,Sun,Dinner,2
178 | 17.89,2.0,Male,Yes,Sun,Dinner,2
179 | 14.48,2.0,Male,Yes,Sun,Dinner,2
180 | 9.6,4.0,Female,Yes,Sun,Dinner,2
181 | 34.63,3.55,Male,Yes,Sun,Dinner,2
182 | 34.65,3.68,Male,Yes,Sun,Dinner,4
183 | 23.33,5.65,Male,Yes,Sun,Dinner,2
184 | 45.35,3.5,Male,Yes,Sun,Dinner,3
185 | 23.17,6.5,Male,Yes,Sun,Dinner,4
186 | 40.55,3.0,Male,Yes,Sun,Dinner,2
187 | 20.69,5.0,Male,No,Sun,Dinner,5
188 | 20.9,3.5,Female,Yes,Sun,Dinner,3
189 | 30.46,2.0,Male,Yes,Sun,Dinner,5
190 | 18.15,3.5,Female,Yes,Sun,Dinner,3
191 | 23.1,4.0,Male,Yes,Sun,Dinner,3
192 | 15.69,1.5,Male,Yes,Sun,Dinner,2
193 | 19.81,4.19,Female,Yes,Thur,Lunch,2
194 | 28.44,2.56,Male,Yes,Thur,Lunch,2
195 | 15.48,2.02,Male,Yes,Thur,Lunch,2
196 | 16.58,4.0,Male,Yes,Thur,Lunch,2
197 | 7.56,1.44,Male,No,Thur,Lunch,2
198 | 10.34,2.0,Male,Yes,Thur,Lunch,2
199 | 43.11,5.0,Female,Yes,Thur,Lunch,4
200 | 13.0,2.0,Female,Yes,Thur,Lunch,2
201 | 13.51,2.0,Male,Yes,Thur,Lunch,2
202 | 18.71,4.0,Male,Yes,Thur,Lunch,3
203 | 12.74,2.01,Female,Yes,Thur,Lunch,2
204 | 13.0,2.0,Female,Yes,Thur,Lunch,2
205 | 16.4,2.5,Female,Yes,Thur,Lunch,2
206 | 20.53,4.0,Male,Yes,Thur,Lunch,4
207 | 16.47,3.23,Female,Yes,Thur,Lunch,3
208 | 26.59,3.41,Male,Yes,Sat,Dinner,3
209 | 38.73,3.0,Male,Yes,Sat,Dinner,4
210 | 24.27,2.03,Male,Yes,Sat,Dinner,2
211 | 12.76,2.23,Female,Yes,Sat,Dinner,2
212 | 30.06,2.0,Male,Yes,Sat,Dinner,3
213 | 25.89,5.16,Male,Yes,Sat,Dinner,4
214 | 48.33,9.0,Male,No,Sat,Dinner,4
215 | 13.27,2.5,Female,Yes,Sat,Dinner,2
216 | 28.17,6.5,Female,Yes,Sat,Dinner,3
217 | 12.9,1.1,Female,Yes,Sat,Dinner,2
218 | 28.15,3.0,Male,Yes,Sat,Dinner,5
219 | 11.59,1.5,Male,Yes,Sat,Dinner,2
220 | 7.74,1.44,Male,Yes,Sat,Dinner,2
221 | 30.14,3.09,Female,Yes,Sat,Dinner,4
222 | 12.16,2.2,Male,Yes,Fri,Lunch,2
223 | 13.42,3.48,Female,Yes,Fri,Lunch,2
224 | 8.58,1.92,Male,Yes,Fri,Lunch,1
225 | 15.98,3.0,Female,No,Fri,Lunch,3
226 | 13.42,1.58,Male,Yes,Fri,Lunch,2
227 | 16.27,2.5,Female,Yes,Fri,Lunch,2
228 | 10.09,2.0,Female,Yes,Fri,Lunch,2
229 | 20.45,3.0,Male,No,Sat,Dinner,4
230 | 13.28,2.72,Male,No,Sat,Dinner,2
231 | 22.12,2.88,Female,Yes,Sat,Dinner,2
232 | 24.01,2.0,Male,Yes,Sat,Dinner,4
233 | 15.69,3.0,Male,Yes,Sat,Dinner,3
234 | 11.61,3.39,Male,No,Sat,Dinner,2
235 | 10.77,1.47,Male,No,Sat,Dinner,2
236 | 15.53,3.0,Male,Yes,Sat,Dinner,2
237 | 10.07,1.25,Male,No,Sat,Dinner,2
238 | 12.6,1.0,Male,Yes,Sat,Dinner,2
239 | 32.83,1.17,Male,Yes,Sat,Dinner,2
240 | 35.83,4.67,Female,No,Sat,Dinner,3
241 | 29.03,5.92,Male,No,Sat,Dinner,3
242 | 27.18,2.0,Female,Yes,Sat,Dinner,2
243 | 22.67,2.0,Male,Yes,Sat,Dinner,2
244 | 17.82,1.75,Male,No,Sat,Dinner,2
245 | 18.78,3.0,Female,No,Thur,Dinner,2
246 |
--------------------------------------------------------------------------------
/data/個股_類別.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wei1234c/Introduction_to_Pandas/af523d3dc81a1bf49a03740781b1a918ace2cfc9/data/個股_類別.rar
--------------------------------------------------------------------------------
/jpgs/MyPicture1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wei1234c/Introduction_to_Pandas/af523d3dc81a1bf49a03740781b1a918ace2cfc9/jpgs/MyPicture1.jpg
--------------------------------------------------------------------------------
/notebooks/0. Pandas入門介紹.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Pandas 入門介紹\n",
12 | "[Taichung.py](http://www.meetup.com/Taichung-Python-Meetup/) \n",
13 | "2016/3/12\n",
14 | "Wei Lin \n",
15 | "[Wei1234c@gmail.com](mailto://wei1234c@gmail.com) "
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "slideshow": {
22 | "slide_type": "slide"
23 | }
24 | },
25 | "source": [
26 | "## Books: \n",
27 | "[Python for Data Analysis](http://www.books.com.tw/products/F012771443) \n",
28 | " "
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "collapsed": true,
35 | "slideshow": {
36 | "slide_type": "slide"
37 | }
38 | },
39 | "source": [
40 | "## Videos: \n",
41 | "[Data analysis in Python with pandas - Wes McKinney (Pandas之父)](https://www.youtube.com/watch?v=w26x-z-BdWQ) \n",
42 | "[Analyzing data with Pandas - PyCon SE 2015](https://www.youtube.com/watch?v=kSM8S76qYz0) \n",
43 | "[Pandas From The Ground Up - PyCon 2015](https://www.youtube.com/watch?v=5JnMutdy6Fw) / [Brandon’s Pandas Tutorial](https://github.com/brandon-rhodes/pycon-pandas-tutorial) \n",
44 | "[Hands-on Data Analysis with Python - PyCon 2015](https://www.youtube.com/watch?v=L4Hbv4ugUWk&list=PLHJdMADCPuGQeXXvSJnXGNhvYoOwcXjUD&index=4) "
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {
50 | "slideshow": {
51 | "slide_type": "slide"
52 | }
53 | },
54 | "source": [
55 | "## Documentation:\n",
56 | "[pandas documentation — API Reference](http://pandas.pydata.org/pandas-docs/stable/api.html) "
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "slideshow": {
63 | "slide_type": "slide"
64 | }
65 | },
66 | "source": [
67 | "## About Me\n",
68 | "- Wei Lin : \n",
69 | " - [Wei1234c@gmail.com](mailto://Wei1234c@gmail.com)\n",
70 | " - [Twitter: @Wei_1144](https://twitter.com/Wei_1144)\n",
71 | " - [facebook: Wei Lin](https://www.facebook.com/wei.lin.921025)\n",
72 | " - [Github: wei1234c](https://github.com/Wei1234c)\n",
73 | "- A planner in private enterprises\n",
74 | "- Started learning Python in 2015\n",
75 | "- Interested in Data-Science and A.I.\n",
76 | "- [Speaker of PyCon TW 2016 - 2016/6/3](https://tw.pycon.org/2016/zh-hant/events/talk/68823578639859763/) \n",
77 | "- [Speaker of PyCon JP 2016 - 2016/9/22](https://pycon.jp/2016/en/schedule/presentation/5/)"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {
83 | "slideshow": {
84 | "slide_type": "slide"
85 | }
86 | },
87 | "source": [
88 | "## About this talk\n",
89 | "1. Pandas - main classes and structure\n",
90 | "2. Pandas - I/O tools\n",
91 | "3. Pandas - ETL tools\n",
92 | "4. Pandas - plotting\n",
93 | "5. Pandas - GroupBy"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "metadata": {
99 | "slideshow": {
100 | "slide_type": "slide"
101 | }
102 | },
103 | "source": [
104 | "## Design of this talk\n",
105 | "1. Theory and Practice\n",
106 | "2. Learning a language\n",
107 | "3. What if ..."
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {
113 | "slideshow": {
114 | "slide_type": "slide"
115 | }
116 | },
117 | "source": [
118 | "## [IPython Notebook](http://u.camdemy.com/media/399) 使用技巧簡介\n",
119 | "- Help\n",
120 | "- Insert a Cell\n",
121 | "- Menu / Document"
122 | ]
123 | }
124 | ],
125 | "metadata": {
126 | "anaconda-cloud": {},
127 | "kernelspec": {
128 | "display_name": "Python [default]",
129 | "language": "python",
130 | "name": "python3"
131 | },
132 | "language_info": {
133 | "codemirror_mode": {
134 | "name": "ipython",
135 | "version": 3
136 | },
137 | "file_extension": ".py",
138 | "mimetype": "text/x-python",
139 | "name": "python",
140 | "nbconvert_exporter": "python",
141 | "pygments_lexer": "ipython3",
142 | "version": "3.5.1"
143 | }
144 | },
145 | "nbformat": 4,
146 | "nbformat_minor": 0
147 | }
148 |
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/2. Pandas - IO tools.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 2. Pandas - IO tools"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "Using matplotlib backend: Qt4Agg\n",
22 | "Populating the interactive namespace from numpy and matplotlib\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "%pylab\n",
28 | "from pandas import Series, DataFrame\n",
29 | "import pandas as pd"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "## 讀寫本文格式的數據"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "collapsed": true
43 | },
44 | "source": [
45 | "將text轉換為DataFrame的函數,其選項分為:\n",
46 | "- 索引\n",
47 | "- 類型推斷 和 數據轉換\n",
48 | "- 日期解析\n",
49 | "- 佚代\n",
50 | "- 不規整數據問題"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "類型推斷(type inference)是最重要的功能之一,不需要指定列的資料型態"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {
64 | "collapsed": false
65 | },
66 | "outputs": [
67 | {
68 | "name": "stdout",
69 | "output_type": "stream",
70 | "text": [
71 | "a,b,c,d,message\n",
72 | "1,2,3,4,hello\n",
73 | "5,6,7,8,world\n",
74 | "9,10,11,12,foo\n"
75 | ]
76 | }
77 | ],
78 | "source": [
79 | "!cat ex1.csv"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 4,
85 | "metadata": {
86 | "collapsed": false
87 | },
88 | "outputs": [
89 | {
90 | "name": "stdout",
91 | "output_type": "stream",
92 | "text": [
93 | "a,b,c,d,message\n",
94 | "1,2,3,4,hello\n",
95 | "5,6,7,8,world\n",
96 | "9,10,11,12,foo\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "!type ex1.csv"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 5,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/html": [
114 | "\n",
115 | "
\n",
116 | " \n",
117 | " \n",
118 | " | \n",
119 | " a | \n",
120 | " b | \n",
121 | " c | \n",
122 | " d | \n",
123 | " message | \n",
124 | "
\n",
125 | " \n",
126 | " \n",
127 | " \n",
128 | " 0 | \n",
129 | " 1 | \n",
130 | " 2 | \n",
131 | " 3 | \n",
132 | " 4 | \n",
133 | " hello | \n",
134 | "
\n",
135 | " \n",
136 | " 1 | \n",
137 | " 5 | \n",
138 | " 6 | \n",
139 | " 7 | \n",
140 | " 8 | \n",
141 | " world | \n",
142 | "
\n",
143 | " \n",
144 | " 2 | \n",
145 | " 9 | \n",
146 | " 10 | \n",
147 | " 11 | \n",
148 | " 12 | \n",
149 | " foo | \n",
150 | "
\n",
151 | " \n",
152 | "
\n",
153 | "
"
154 | ],
155 | "text/plain": [
156 | " a b c d message\n",
157 | "0 1 2 3 4 hello\n",
158 | "1 5 6 7 8 world\n",
159 | "2 9 10 11 12 foo"
160 | ]
161 | },
162 | "execution_count": 5,
163 | "metadata": {},
164 | "output_type": "execute_result"
165 | }
166 | ],
167 | "source": [
168 | "# read_csv 讀入 csv檔案\n",
169 | "df = pd.read_csv('ex1.csv')\n",
170 | "df"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 6,
176 | "metadata": {
177 | "collapsed": false
178 | },
179 | "outputs": [
180 | {
181 | "data": {
182 | "text/html": [
183 | "\n",
184 | "
\n",
185 | " \n",
186 | " \n",
187 | " | \n",
188 | " a | \n",
189 | " b | \n",
190 | " c | \n",
191 | " d | \n",
192 | " message | \n",
193 | "
\n",
194 | " \n",
195 | " \n",
196 | " \n",
197 | " 0 | \n",
198 | " 1 | \n",
199 | " 2 | \n",
200 | " 3 | \n",
201 | " 4 | \n",
202 | " hello | \n",
203 | "
\n",
204 | " \n",
205 | " 1 | \n",
206 | " 5 | \n",
207 | " 6 | \n",
208 | " 7 | \n",
209 | " 8 | \n",
210 | " world | \n",
211 | "
\n",
212 | " \n",
213 | " 2 | \n",
214 | " 9 | \n",
215 | " 10 | \n",
216 | " 11 | \n",
217 | " 12 | \n",
218 | " foo | \n",
219 | "
\n",
220 | " \n",
221 | "
\n",
222 | "
"
223 | ],
224 | "text/plain": [
225 | " a b c d message\n",
226 | "0 1 2 3 4 hello\n",
227 | "1 5 6 7 8 world\n",
228 | "2 9 10 11 12 foo"
229 | ]
230 | },
231 | "execution_count": 6,
232 | "metadata": {},
233 | "output_type": "execute_result"
234 | }
235 | ],
236 | "source": [
237 | "# 也可以讀入table,不過需要指定分隔符號\n",
238 | "df = pd.read_table('ex1.csv', sep = ',')\n",
239 | "df"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 7,
245 | "metadata": {
246 | "collapsed": false
247 | },
248 | "outputs": [
249 | {
250 | "name": "stdout",
251 | "output_type": "stream",
252 | "text": [
253 | "1,2,3,4,hello\n",
254 | "5,6,7,8,world\n",
255 | "9,10,11,12,foo\n"
256 | ]
257 | }
258 | ],
259 | "source": [
260 | "# 沒有欄位名稱列的檔案\n",
261 | "!type ex2.csv"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 8,
267 | "metadata": {
268 | "collapsed": false
269 | },
270 | "outputs": [
271 | {
272 | "data": {
273 | "text/html": [
274 | "\n",
275 | "
\n",
276 | " \n",
277 | " \n",
278 | " | \n",
279 | " 1 | \n",
280 | " 2 | \n",
281 | " 3 | \n",
282 | " 4 | \n",
283 | " hello | \n",
284 | "
\n",
285 | " \n",
286 | " \n",
287 | " \n",
288 | " 0 | \n",
289 | " 5 | \n",
290 | " 6 | \n",
291 | " 7 | \n",
292 | " 8 | \n",
293 | " world | \n",
294 | "
\n",
295 | " \n",
296 | " 1 | \n",
297 | " 9 | \n",
298 | " 10 | \n",
299 | " 11 | \n",
300 | " 12 | \n",
301 | " foo | \n",
302 | "
\n",
303 | " \n",
304 | "
\n",
305 | "
"
306 | ],
307 | "text/plain": [
308 | " 1 2 3 4 hello\n",
309 | "0 5 6 7 8 world\n",
310 | "1 9 10 11 12 foo"
311 | ]
312 | },
313 | "execution_count": 8,
314 | "metadata": {},
315 | "output_type": "execute_result"
316 | }
317 | ],
318 | "source": [
319 | "# 預設會把第一列當作 欄位名稱列\n",
320 | "df = pd.read_csv('ex2.csv', )\n",
321 | "df"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 9,
327 | "metadata": {
328 | "collapsed": false
329 | },
330 | "outputs": [
331 | {
332 | "data": {
333 | "text/html": [
334 | "\n",
335 | "
\n",
336 | " \n",
337 | " \n",
338 | " | \n",
339 | " 0 | \n",
340 | " 1 | \n",
341 | " 2 | \n",
342 | " 3 | \n",
343 | " 4 | \n",
344 | "
\n",
345 | " \n",
346 | " \n",
347 | " \n",
348 | " 0 | \n",
349 | " 1 | \n",
350 | " 2 | \n",
351 | " 3 | \n",
352 | " 4 | \n",
353 | " hello | \n",
354 | "
\n",
355 | " \n",
356 | " 1 | \n",
357 | " 5 | \n",
358 | " 6 | \n",
359 | " 7 | \n",
360 | " 8 | \n",
361 | " world | \n",
362 | "
\n",
363 | " \n",
364 | " 2 | \n",
365 | " 9 | \n",
366 | " 10 | \n",
367 | " 11 | \n",
368 | " 12 | \n",
369 | " foo | \n",
370 | "
\n",
371 | " \n",
372 | "
\n",
373 | "
"
374 | ],
375 | "text/plain": [
376 | " 0 1 2 3 4\n",
377 | "0 1 2 3 4 hello\n",
378 | "1 5 6 7 8 world\n",
379 | "2 9 10 11 12 foo"
380 | ]
381 | },
382 | "execution_count": 9,
383 | "metadata": {},
384 | "output_type": "execute_result"
385 | }
386 | ],
387 | "source": [
388 | "# 標示沒有欄位名稱列\n",
389 | "df = pd.read_csv('ex2.csv', header = None)\n",
390 | "df"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 10,
396 | "metadata": {
397 | "collapsed": false
398 | },
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/html": [
403 | "\n",
404 | "
\n",
405 | " \n",
406 | " \n",
407 | " | \n",
408 | " a | \n",
409 | " b | \n",
410 | " c | \n",
411 | " d | \n",
412 | " message | \n",
413 | "
\n",
414 | " \n",
415 | " \n",
416 | " \n",
417 | " 0 | \n",
418 | " 1 | \n",
419 | " 2 | \n",
420 | " 3 | \n",
421 | " 4 | \n",
422 | " hello | \n",
423 | "
\n",
424 | " \n",
425 | " 1 | \n",
426 | " 5 | \n",
427 | " 6 | \n",
428 | " 7 | \n",
429 | " 8 | \n",
430 | " world | \n",
431 | "
\n",
432 | " \n",
433 | " 2 | \n",
434 | " 9 | \n",
435 | " 10 | \n",
436 | " 11 | \n",
437 | " 12 | \n",
438 | " foo | \n",
439 | "
\n",
440 | " \n",
441 | "
\n",
442 | "
"
443 | ],
444 | "text/plain": [
445 | " a b c d message\n",
446 | "0 1 2 3 4 hello\n",
447 | "1 5 6 7 8 world\n",
448 | "2 9 10 11 12 foo"
449 | ]
450 | },
451 | "execution_count": 10,
452 | "metadata": {},
453 | "output_type": "execute_result"
454 | }
455 | ],
456 | "source": [
457 | "# 自定義 欄位名稱\n",
458 | "fields = ['a', 'b', 'c', 'd', 'message']\n",
459 | "df = pd.read_csv('ex2.csv', names = fields)\n",
460 | "df"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 11,
466 | "metadata": {
467 | "collapsed": false
468 | },
469 | "outputs": [
470 | {
471 | "data": {
472 | "text/html": [
473 | "\n",
474 | "
\n",
475 | " \n",
476 | " \n",
477 | " | \n",
478 | " a | \n",
479 | " b | \n",
480 | " c | \n",
481 | " d | \n",
482 | "
\n",
483 | " \n",
484 | " message | \n",
485 | " | \n",
486 | " | \n",
487 | " | \n",
488 | " | \n",
489 | "
\n",
490 | " \n",
491 | " \n",
492 | " \n",
493 | " hello | \n",
494 | " 1 | \n",
495 | " 2 | \n",
496 | " 3 | \n",
497 | " 4 | \n",
498 | "
\n",
499 | " \n",
500 | " world | \n",
501 | " 5 | \n",
502 | " 6 | \n",
503 | " 7 | \n",
504 | " 8 | \n",
505 | "
\n",
506 | " \n",
507 | " foo | \n",
508 | " 9 | \n",
509 | " 10 | \n",
510 | " 11 | \n",
511 | " 12 | \n",
512 | "
\n",
513 | " \n",
514 | "
\n",
515 | "
"
516 | ],
517 | "text/plain": [
518 | " a b c d\n",
519 | "message \n",
520 | "hello 1 2 3 4\n",
521 | "world 5 6 7 8\n",
522 | "foo 9 10 11 12"
523 | ]
524 | },
525 | "execution_count": 11,
526 | "metadata": {},
527 | "output_type": "execute_result"
528 | }
529 | ],
530 | "source": [
531 | "# 可以 使用 index_col 參數,將某一欄設定為DataFrame的索引\n",
532 | "fields = ['a', 'b', 'c', 'd', 'message']\n",
533 | "df = pd.read_csv('ex2.csv', names = fields, index_col = 'message')\n",
534 | "df"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": 12,
540 | "metadata": {
541 | "collapsed": false
542 | },
543 | "outputs": [
544 | {
545 | "name": "stdout",
546 | "output_type": "stream",
547 | "text": [
548 | "key1,key2,value1,value2\n",
549 | "one,a,1,2\n",
550 | "one,b,3,4\n",
551 | "one,c,5,6\n",
552 | "one,d,7,8\n",
553 | "two,a,9,10\n",
554 | "two,b,11,12\n",
555 | "two,c,13,14\n",
556 | "two,d,15,16\n"
557 | ]
558 | },
559 | {
560 | "data": {
561 | "text/html": [
562 | "\n",
563 | "
\n",
564 | " \n",
565 | " \n",
566 | " | \n",
567 | " | \n",
568 | " value1 | \n",
569 | " value2 | \n",
570 | "
\n",
571 | " \n",
572 | " key1 | \n",
573 | " key2 | \n",
574 | " | \n",
575 | " | \n",
576 | "
\n",
577 | " \n",
578 | " \n",
579 | " \n",
580 | " one | \n",
581 | " a | \n",
582 | " 1 | \n",
583 | " 2 | \n",
584 | "
\n",
585 | " \n",
586 | " b | \n",
587 | " 3 | \n",
588 | " 4 | \n",
589 | "
\n",
590 | " \n",
591 | " c | \n",
592 | " 5 | \n",
593 | " 6 | \n",
594 | "
\n",
595 | " \n",
596 | " d | \n",
597 | " 7 | \n",
598 | " 8 | \n",
599 | "
\n",
600 | " \n",
601 | " two | \n",
602 | " a | \n",
603 | " 9 | \n",
604 | " 10 | \n",
605 | "
\n",
606 | " \n",
607 | " b | \n",
608 | " 11 | \n",
609 | " 12 | \n",
610 | "
\n",
611 | " \n",
612 | " c | \n",
613 | " 13 | \n",
614 | " 14 | \n",
615 | "
\n",
616 | " \n",
617 | " d | \n",
618 | " 15 | \n",
619 | " 16 | \n",
620 | "
\n",
621 | " \n",
622 | "
\n",
623 | "
"
624 | ],
625 | "text/plain": [
626 | " value1 value2\n",
627 | "key1 key2 \n",
628 | "one a 1 2\n",
629 | " b 3 4\n",
630 | " c 5 6\n",
631 | " d 7 8\n",
632 | "two a 9 10\n",
633 | " b 11 12\n",
634 | " c 13 14\n",
635 | " d 15 16"
636 | ]
637 | },
638 | "execution_count": 12,
639 | "metadata": {},
640 | "output_type": "execute_result"
641 | }
642 | ],
643 | "source": [
644 | "# 可以 使用 index_col 參數,將多個欄設定為DataFrame的層次化索引 \n",
645 | "!type ex3.csv\n",
646 | "df = pd.read_csv('ex3.csv', index_col = ['key1', 'key2'])\n",
647 | "df"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": 13,
653 | "metadata": {
654 | "collapsed": false
655 | },
656 | "outputs": [
657 | {
658 | "name": "stdout",
659 | "output_type": "stream",
660 | "text": [
661 | "\tA\tB\tC\n",
662 | "aaa \t-0.264 \t-1.026 \t-0.619\n",
663 | "bbb\t 0.927\t 0.302\t -0.032\n",
664 | "ccc -0.265\t -0.385\t -0.217\n",
665 | "\t\n"
666 | ]
667 | },
668 | {
669 | "data": {
670 | "text/html": [
671 | "\n",
672 | "
\n",
673 | " \n",
674 | " \n",
675 | " | \n",
676 | " A | \n",
677 | " B | \n",
678 | " C | \n",
679 | "
\n",
680 | " \n",
681 | " \n",
682 | " \n",
683 | " aaa | \n",
684 | " -0.264 | \n",
685 | " -1.026 | \n",
686 | " -0.619 | \n",
687 | "
\n",
688 | " \n",
689 | " bbb | \n",
690 | " 0.927 | \n",
691 | " 0.302 | \n",
692 | " -0.032 | \n",
693 | "
\n",
694 | " \n",
695 | " ccc | \n",
696 | " -0.265 | \n",
697 | " -0.385 | \n",
698 | " -0.217 | \n",
699 | "
\n",
700 | " \n",
701 | "
\n",
702 | "
"
703 | ],
704 | "text/plain": [
705 | " A B C\n",
706 | "aaa -0.264 -1.026 -0.619\n",
707 | "bbb 0.927 0.302 -0.032\n",
708 | "ccc -0.265 -0.385 -0.217"
709 | ]
710 | },
711 | "execution_count": 13,
712 | "metadata": {},
713 | "output_type": "execute_result"
714 | }
715 | ],
716 | "source": [
717 | "# 如果不是以固定的分隔符號來分隔字段,可以用 read_table + regex 作為 sep參數\n",
718 | "# 由於列名比資料列的數量少,因此read_table推斷第一列應該是DataFrame的索引\n",
719 | "# 以不定數量的空白做分隔\n",
720 | "!type \"ex3 - 1.csv\" \n",
721 | "df = pd.read_table('ex3 - 1.csv', sep = '\\s+')\n",
722 | "df"
723 | ]
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": 14,
728 | "metadata": {
729 | "collapsed": false
730 | },
731 | "outputs": [
732 | {
733 | "name": "stdout",
734 | "output_type": "stream",
735 | "text": [
736 | "# hey!\n",
737 | "a,b,c,d,message\n",
738 | "# just wanted to make things more difficult for you\n",
739 | "# who read CSV files with computers, anyway?\n",
740 | "1,2,3,4,hello\n",
741 | "5,6,7,8,world\n",
742 | "9,10,11,12,foo\n"
743 | ]
744 | },
745 | {
746 | "data": {
747 | "text/html": [
748 | "\n",
749 | "
\n",
750 | " \n",
751 | " \n",
752 | " | \n",
753 | " a | \n",
754 | " b | \n",
755 | " c | \n",
756 | " d | \n",
757 | "
\n",
758 | " \n",
759 | " message | \n",
760 | " | \n",
761 | " | \n",
762 | " | \n",
763 | " | \n",
764 | "
\n",
765 | " \n",
766 | " \n",
767 | " \n",
768 | " hello | \n",
769 | " 1 | \n",
770 | " 2 | \n",
771 | " 3 | \n",
772 | " 4 | \n",
773 | "
\n",
774 | " \n",
775 | " world | \n",
776 | " 5 | \n",
777 | " 6 | \n",
778 | " 7 | \n",
779 | " 8 | \n",
780 | "
\n",
781 | " \n",
782 | " foo | \n",
783 | " 9 | \n",
784 | " 10 | \n",
785 | " 11 | \n",
786 | " 12 | \n",
787 | "
\n",
788 | " \n",
789 | "
\n",
790 | "
"
791 | ],
792 | "text/plain": [
793 | " a b c d\n",
794 | "message \n",
795 | "hello 1 2 3 4\n",
796 | "world 5 6 7 8\n",
797 | "foo 9 10 11 12"
798 | ]
799 | },
800 | "execution_count": 14,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "# 讀檔時,可以用 skiprows 來跳過指定的 rows\n",
807 | "!type ex4.csv\n",
808 | "df = pd.read_csv('ex4.csv', skiprows = [0, 2, 3], index_col = 'message')\n",
809 | "df"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 15,
815 | "metadata": {
816 | "collapsed": false
817 | },
818 | "outputs": [
819 | {
820 | "name": "stdout",
821 | "output_type": "stream",
822 | "text": [
823 | "something,a,b,c,d,message\n",
824 | "one,1,2,3,4,NA\n",
825 | "two,5,6,,8,world\n",
826 | "three,9,10,11,12,foo \n"
827 | ]
828 | },
829 | {
830 | "data": {
831 | "text/html": [
832 | "\n",
833 | "
\n",
834 | " \n",
835 | " \n",
836 | " | \n",
837 | " a | \n",
838 | " b | \n",
839 | " c | \n",
840 | " d | \n",
841 | " message | \n",
842 | "
\n",
843 | " \n",
844 | " something | \n",
845 | " | \n",
846 | " | \n",
847 | " | \n",
848 | " | \n",
849 | " | \n",
850 | "
\n",
851 | " \n",
852 | " \n",
853 | " \n",
854 | " one | \n",
855 | " 1 | \n",
856 | " 2 | \n",
857 | " 3.0 | \n",
858 | " 4 | \n",
859 | " NaN | \n",
860 | "
\n",
861 | " \n",
862 | " two | \n",
863 | " 5 | \n",
864 | " 6 | \n",
865 | " NaN | \n",
866 | " 8 | \n",
867 | " world | \n",
868 | "
\n",
869 | " \n",
870 | " three | \n",
871 | " 9 | \n",
872 | " 10 | \n",
873 | " 11.0 | \n",
874 | " 12 | \n",
875 | " foo | \n",
876 | "
\n",
877 | " \n",
878 | "
\n",
879 | "
"
880 | ],
881 | "text/plain": [
882 | " a b c d message\n",
883 | "something \n",
884 | "one 1 2 3.0 4 NaN\n",
885 | "two 5 6 NaN 8 world\n",
886 | "three 9 10 11.0 12 foo "
887 | ]
888 | },
889 | "execution_count": 15,
890 | "metadata": {},
891 | "output_type": "execute_result"
892 | }
893 | ],
894 | "source": [
895 | "# 缺失數據的處理\n",
896 | "# read_csv 會自動判斷,然後以NaN標示缺失數據的位置\n",
897 | "!type ex5.csv\n",
898 | "df = pd.read_csv('ex5.csv', index_col = 'something')\n",
899 | "df"
900 | ]
901 | },
902 | {
903 | "cell_type": "code",
904 | "execution_count": 16,
905 | "metadata": {
906 | "collapsed": false
907 | },
908 | "outputs": [
909 | {
910 | "data": {
911 | "text/html": [
912 | "\n",
913 | "
\n",
914 | " \n",
915 | " \n",
916 | " | \n",
917 | " a | \n",
918 | " b | \n",
919 | " c | \n",
920 | " d | \n",
921 | " message | \n",
922 | "
\n",
923 | " \n",
924 | " something | \n",
925 | " | \n",
926 | " | \n",
927 | " | \n",
928 | " | \n",
929 | " | \n",
930 | "
\n",
931 | " \n",
932 | " \n",
933 | " \n",
934 | " one | \n",
935 | " False | \n",
936 | " False | \n",
937 | " False | \n",
938 | " False | \n",
939 | " True | \n",
940 | "
\n",
941 | " \n",
942 | " two | \n",
943 | " False | \n",
944 | " False | \n",
945 | " True | \n",
946 | " False | \n",
947 | " False | \n",
948 | "
\n",
949 | " \n",
950 | " three | \n",
951 | " False | \n",
952 | " False | \n",
953 | " False | \n",
954 | " False | \n",
955 | " False | \n",
956 | "
\n",
957 | " \n",
958 | "
\n",
959 | "
"
960 | ],
961 | "text/plain": [
962 | " a b c d message\n",
963 | "something \n",
964 | "one False False False False True\n",
965 | "two False False True False False\n",
966 | "three False False False False False"
967 | ]
968 | },
969 | "execution_count": 16,
970 | "metadata": {},
971 | "output_type": "execute_result"
972 | }
973 | ],
974 | "source": [
975 | "# isnull(),判斷元素是否為NaN\n",
976 | "df.isnull()"
977 | ]
978 | },
979 | {
980 | "cell_type": "code",
981 | "execution_count": 17,
982 | "metadata": {
983 | "collapsed": false
984 | },
985 | "outputs": [
986 | {
987 | "data": {
988 | "text/html": [
989 | "\n",
990 | "
\n",
991 | " \n",
992 | " \n",
993 | " | \n",
994 | " a | \n",
995 | " b | \n",
996 | " c | \n",
997 | " d | \n",
998 | " message | \n",
999 | "
\n",
1000 | " \n",
1001 | " something | \n",
1002 | " | \n",
1003 | " | \n",
1004 | " | \n",
1005 | " | \n",
1006 | " | \n",
1007 | "
\n",
1008 | " \n",
1009 | " \n",
1010 | " \n",
1011 | " one | \n",
1012 | " False | \n",
1013 | " False | \n",
1014 | " False | \n",
1015 | " False | \n",
1016 | " True | \n",
1017 | "
\n",
1018 | " \n",
1019 | " two | \n",
1020 | " False | \n",
1021 | " False | \n",
1022 | " True | \n",
1023 | " False | \n",
1024 | " False | \n",
1025 | "
\n",
1026 | " \n",
1027 | " three | \n",
1028 | " False | \n",
1029 | " False | \n",
1030 | " False | \n",
1031 | " False | \n",
1032 | " False | \n",
1033 | "
\n",
1034 | " \n",
1035 | "
\n",
1036 | "
"
1037 | ],
1038 | "text/plain": [
1039 | " a b c d message\n",
1040 | "something \n",
1041 | "one False False False False True\n",
1042 | "two False False True False False\n",
1043 | "three False False False False False"
1044 | ]
1045 | },
1046 | "execution_count": 17,
1047 | "metadata": {},
1048 | "output_type": "execute_result"
1049 | }
1050 | ],
1051 | "source": [
1052 | "pd.isnull(df)"
1053 | ]
1054 | },
1055 | {
1056 | "cell_type": "code",
1057 | "execution_count": 18,
1058 | "metadata": {
1059 | "collapsed": false
1060 | },
1061 | "outputs": [
1062 | {
1063 | "data": {
1064 | "text/html": [
1065 | "\n",
1066 | "
\n",
1067 | " \n",
1068 | " \n",
1069 | " | \n",
1070 | " a | \n",
1071 | " b | \n",
1072 | " c | \n",
1073 | " d | \n",
1074 | " message | \n",
1075 | "
\n",
1076 | " \n",
1077 | " something | \n",
1078 | " | \n",
1079 | " | \n",
1080 | " | \n",
1081 | " | \n",
1082 | " | \n",
1083 | "
\n",
1084 | " \n",
1085 | " \n",
1086 | " \n",
1087 | " one | \n",
1088 | " 1 | \n",
1089 | " 2 | \n",
1090 | " 3.0 | \n",
1091 | " 4 | \n",
1092 | " NaN | \n",
1093 | "
\n",
1094 | " \n",
1095 | " two | \n",
1096 | " 5 | \n",
1097 | " 6 | \n",
1098 | " NaN | \n",
1099 | " 8 | \n",
1100 | " world | \n",
1101 | "
\n",
1102 | " \n",
1103 | " three | \n",
1104 | " 9 | \n",
1105 | " 10 | \n",
1106 | " 11.0 | \n",
1107 | " 12 | \n",
1108 | " foo | \n",
1109 | "
\n",
1110 | " \n",
1111 | "
\n",
1112 | "
"
1113 | ],
1114 | "text/plain": [
1115 | " a b c d message\n",
1116 | "something \n",
1117 | "one 1 2 3.0 4 NaN\n",
1118 | "two 5 6 NaN 8 world\n",
1119 | "three 9 10 11.0 12 foo "
1120 | ]
1121 | },
1122 | "execution_count": 18,
1123 | "metadata": {},
1124 | "output_type": "execute_result"
1125 | }
1126 | ],
1127 | "source": [
1128 | "# na_values 參數可指定用於標示缺失數據的字串\n",
1129 | "df = pd.read_csv('ex5.csv', index_col = 'something', na_values = ['NULL'])\n",
1130 | "df"
1131 | ]
1132 | },
1133 | {
1134 | "cell_type": "code",
1135 | "execution_count": 19,
1136 | "metadata": {
1137 | "collapsed": false
1138 | },
1139 | "outputs": [
1140 | {
1141 | "data": {
1142 | "text/html": [
1143 | "\n",
1144 | "
\n",
1145 | " \n",
1146 | " \n",
1147 | " | \n",
1148 | " something | \n",
1149 | " a | \n",
1150 | " b | \n",
1151 | " c | \n",
1152 | " d | \n",
1153 | " message | \n",
1154 | "
\n",
1155 | " \n",
1156 | " \n",
1157 | " \n",
1158 | " 0 | \n",
1159 | " one | \n",
1160 | " 1 | \n",
1161 | " 2 | \n",
1162 | " 3.0 | \n",
1163 | " 4 | \n",
1164 | " NaN | \n",
1165 | "
\n",
1166 | " \n",
1167 | " 1 | \n",
1168 | " NaN | \n",
1169 | " 5 | \n",
1170 | " 6 | \n",
1171 | " NaN | \n",
1172 | " 8 | \n",
1173 | " world | \n",
1174 | "
\n",
1175 | " \n",
1176 | " 2 | \n",
1177 | " three | \n",
1178 | " 9 | \n",
1179 | " 10 | \n",
1180 | " 11.0 | \n",
1181 | " 12 | \n",
1182 | " foo | \n",
1183 | "
\n",
1184 | " \n",
1185 | "
\n",
1186 | "
"
1187 | ],
1188 | "text/plain": [
1189 | " something a b c d message\n",
1190 | "0 one 1 2 3.0 4 NaN\n",
1191 | "1 NaN 5 6 NaN 8 world\n",
1192 | "2 three 9 10 11.0 12 foo "
1193 | ]
1194 | },
1195 | "execution_count": 19,
1196 | "metadata": {},
1197 | "output_type": "execute_result"
1198 | }
1199 | ],
1200 | "source": [
1201 | "# 為各列分別指定不同的 缺失值標示字串\n",
1202 | "sentinels = {'message': ['foo', 'NA'], 'something': ['two']}\n",
1203 | "df = pd.read_csv('ex5.csv', na_values = sentinels)\n",
1204 | "df"
1205 | ]
1206 | },
1207 | {
1208 | "cell_type": "markdown",
1209 | "metadata": {},
1210 | "source": [
1211 | "### 逐塊讀取文本文件"
1212 | ]
1213 | },
1214 | {
1215 | "cell_type": "code",
1216 | "execution_count": 20,
1217 | "metadata": {
1218 | "collapsed": false
1219 | },
1220 | "outputs": [
1221 | {
1222 | "name": "stdout",
1223 | "output_type": "stream",
1224 | "text": [
1225 | "something,a,b,c,d,message\n",
1226 | "one,1,2,3,4,NA\n",
1227 | "two,5,6,,8,world\n",
1228 | "three,9,10,11,12,foo \n"
1229 | ]
1230 | },
1231 | {
1232 | "data": {
1233 | "text/html": [
1234 | "\n",
1235 | "
\n",
1236 | " \n",
1237 | " \n",
1238 | " | \n",
1239 | " something | \n",
1240 | " a | \n",
1241 | " b | \n",
1242 | " c | \n",
1243 | " d | \n",
1244 | " message | \n",
1245 | "
\n",
1246 | " \n",
1247 | " \n",
1248 | " \n",
1249 | " 0 | \n",
1250 | " one | \n",
1251 | " 1 | \n",
1252 | " 2 | \n",
1253 | " 3.0 | \n",
1254 | " 4 | \n",
1255 | " NaN | \n",
1256 | "
\n",
1257 | " \n",
1258 | " 1 | \n",
1259 | " two | \n",
1260 | " 5 | \n",
1261 | " 6 | \n",
1262 | " NaN | \n",
1263 | " 8 | \n",
1264 | " world | \n",
1265 | "
\n",
1266 | " \n",
1267 | "
\n",
1268 | "
"
1269 | ],
1270 | "text/plain": [
1271 | " something a b c d message\n",
1272 | "0 one 1 2 3.0 4 NaN\n",
1273 | "1 two 5 6 NaN 8 world"
1274 | ]
1275 | },
1276 | "execution_count": 20,
1277 | "metadata": {},
1278 | "output_type": "execute_result"
1279 | }
1280 | ],
1281 | "source": [
1282 | "# 設定 nrows參數,設定讀入的列數\n",
1283 | "!type ex5.csv\n",
1284 | "df = pd.read_csv('ex5.csv', nrows = 2)\n",
1285 | "df"
1286 | ]
1287 | },
1288 | {
1289 | "cell_type": "code",
1290 | "execution_count": 21,
1291 | "metadata": {
1292 | "collapsed": false
1293 | },
1294 | "outputs": [
1295 | {
1296 | "name": "stdout",
1297 | "output_type": "stream",
1298 | "text": [
1299 | "something,a,b,c,d,message\n",
1300 | "one,1,2,3,4,NA\n",
1301 | "two,5,6,,8,world\n",
1302 | "three,9,10,11,12,foo \n"
1303 | ]
1304 | },
1305 | {
1306 | "data": {
1307 | "text/plain": [
1308 | ""
1309 | ]
1310 | },
1311 | "execution_count": 21,
1312 | "metadata": {},
1313 | "output_type": "execute_result"
1314 | }
1315 | ],
1316 | "source": [
1317 | "# 如果要逐塊讀取,則設定chunksize\n",
1318 | "!type ex5.csv\n",
1319 | "chunker = pd.read_csv('ex5.csv', chunksize = 2)\n",
1320 | "chunker"
1321 | ]
1322 | },
1323 | {
1324 | "cell_type": "code",
1325 | "execution_count": 22,
1326 | "metadata": {
1327 | "collapsed": false
1328 | },
1329 | "outputs": [
1330 | {
1331 | "data": {
1332 | "text/plain": [
1333 | "two 1.0\n",
1334 | "three 1.0\n",
1335 | "one 1.0\n",
1336 | "dtype: float64"
1337 | ]
1338 | },
1339 | "execution_count": 22,
1340 | "metadata": {},
1341 | "output_type": "execute_result"
1342 | }
1343 | ],
1344 | "source": [
1345 | "tot = Series([])\n",
1346 | "for piece in chunker:\n",
1347 | " tot = tot.add(piece['something'].value_counts(), fill_value = 0)\n",
1348 | "tot = tot.sort_values(ascending = False)\n",
1349 | "tot"
1350 | ]
1351 | },
1352 | {
1353 | "cell_type": "markdown",
1354 | "metadata": {},
1355 | "source": [
1356 | "## 將數據寫出到文本格式"
1357 | ]
1358 | },
1359 | {
1360 | "cell_type": "code",
1361 | "execution_count": 23,
1362 | "metadata": {
1363 | "collapsed": false
1364 | },
1365 | "outputs": [
1366 | {
1367 | "name": "stdout",
1368 | "output_type": "stream",
1369 | "text": [
1370 | "something,a,b,c,d,message\n",
1371 | "one,1,2,3,4,NA\n",
1372 | "two,5,6,,8,world\n",
1373 | "three,9,10,11,12,foo \n"
1374 | ]
1375 | },
1376 | {
1377 | "data": {
1378 | "text/html": [
1379 | "\n",
1380 | "
\n",
1381 | " \n",
1382 | " \n",
1383 | " | \n",
1384 | " something | \n",
1385 | " a | \n",
1386 | " b | \n",
1387 | " c | \n",
1388 | " d | \n",
1389 | " message | \n",
1390 | "
\n",
1391 | " \n",
1392 | " \n",
1393 | " \n",
1394 | " 0 | \n",
1395 | " one | \n",
1396 | " 1 | \n",
1397 | " 2 | \n",
1398 | " 3.0 | \n",
1399 | " 4 | \n",
1400 | " NaN | \n",
1401 | "
\n",
1402 | " \n",
1403 | " 1 | \n",
1404 | " two | \n",
1405 | " 5 | \n",
1406 | " 6 | \n",
1407 | " NaN | \n",
1408 | " 8 | \n",
1409 | " world | \n",
1410 | "
\n",
1411 | " \n",
1412 | " 2 | \n",
1413 | " three | \n",
1414 | " 9 | \n",
1415 | " 10 | \n",
1416 | " 11.0 | \n",
1417 | " 12 | \n",
1418 | " foo | \n",
1419 | "
\n",
1420 | " \n",
1421 | "
\n",
1422 | "
"
1423 | ],
1424 | "text/plain": [
1425 | " something a b c d message\n",
1426 | "0 one 1 2 3.0 4 NaN\n",
1427 | "1 two 5 6 NaN 8 world\n",
1428 | "2 three 9 10 11.0 12 foo "
1429 | ]
1430 | },
1431 | "execution_count": 23,
1432 | "metadata": {},
1433 | "output_type": "execute_result"
1434 | }
1435 | ],
1436 | "source": [
1437 | "!type ex5.csv\n",
1438 | "df = pd.read_csv('ex5.csv')\n",
1439 | "df"
1440 | ]
1441 | },
1442 | {
1443 | "cell_type": "code",
1444 | "execution_count": 24,
1445 | "metadata": {
1446 | "collapsed": false
1447 | },
1448 | "outputs": [
1449 | {
1450 | "name": "stdout",
1451 | "output_type": "stream",
1452 | "text": [
1453 | ",something,a,b,c,d,message\n",
1454 | "0,one,1,2,3.0,4,\n",
1455 | "1,two,5,6,,8,world\n",
1456 | "2,three,9,10,11.0,12,foo \n"
1457 | ]
1458 | }
1459 | ],
1460 | "source": [
1461 | "# 以 to_csv() 將數據寫出到一個 以逗號分隔 的檔案中\n",
1462 | "df.to_csv('ex5-1.csv')\n",
1463 | "!type \"ex5-1.csv\""
1464 | ]
1465 | },
1466 | {
1467 | "cell_type": "code",
1468 | "execution_count": 25,
1469 | "metadata": {
1470 | "collapsed": false
1471 | },
1472 | "outputs": [
1473 | {
1474 | "name": "stdout",
1475 | "output_type": "stream",
1476 | "text": [
1477 | "|something|a|b|c|d|message\n",
1478 | "0|one|1|2|3.0|4|\n",
1479 | "1|two|5|6||8|world\n",
1480 | "2|three|9|10|11.0|12|foo \n"
1481 | ]
1482 | }
1483 | ],
1484 | "source": [
1485 | "# 寫出的時候,可以設定 sep 參數 指定其他的分隔符號\n",
1486 | "df.to_csv('ex5-1.csv', sep = '|')\n",
1487 | "!type \"ex5-1.csv\""
1488 | ]
1489 | },
1490 | {
1491 | "cell_type": "code",
1492 | "execution_count": 26,
1493 | "metadata": {
1494 | "collapsed": false
1495 | },
1496 | "outputs": [
1497 | {
1498 | "name": "stdout",
1499 | "output_type": "stream",
1500 | "text": [
1501 | ",something,a,b,c,d,message\n",
1502 | "0,one,1,2,3.0,4,NULL\n",
1503 | "1,two,5,6,NULL,8,world\n",
1504 | "2,three,9,10,11.0,12,foo \n"
1505 | ]
1506 | }
1507 | ],
1508 | "source": [
1509 | "# 設定 na_rep 參數,以其他的符號 明確地標示 缺失值\n",
1510 | "df.to_csv('ex5-1.csv', na_rep = 'NULL')\n",
1511 | "!type \"ex5-1.csv\""
1512 | ]
1513 | },
1514 | {
1515 | "cell_type": "code",
1516 | "execution_count": 27,
1517 | "metadata": {
1518 | "collapsed": false
1519 | },
1520 | "outputs": [
1521 | {
1522 | "name": "stdout",
1523 | "output_type": "stream",
1524 | "text": [
1525 | "one,1,2,3.0,4,NULL\n",
1526 | "two,5,6,NULL,8,world\n",
1527 | "three,9,10,11.0,12,foo \n"
1528 | ]
1529 | }
1530 | ],
1531 | "source": [
1532 | "# 可以禁止列出 row, column的標籤\n",
1533 | "# 不輸出index、header\n",
1534 | "df.to_csv('ex5-1.csv', na_rep = 'NULL', index = False, header = False) \n",
1535 | "!type \"ex5-1.csv\""
1536 | ]
1537 | },
1538 | {
1539 | "cell_type": "code",
1540 | "execution_count": 28,
1541 | "metadata": {
1542 | "collapsed": false
1543 | },
1544 | "outputs": [
1545 | {
1546 | "name": "stdout",
1547 | "output_type": "stream",
1548 | "text": [
1549 | "something,a,b,c,d,message\n",
1550 | "one,1,2,3.0,4,NULL\n",
1551 | "two,5,6,NULL,8,world\n",
1552 | "three,9,10,11.0,12,foo \n"
1553 | ]
1554 | }
1555 | ],
1556 | "source": [
1557 | "# 不輸出index\n",
1558 | "df.to_csv('ex5-1.csv', na_rep = 'NULL', index = False) \n",
1559 | "!type \"ex5-1.csv\""
1560 | ]
1561 | },
1562 | {
1563 | "cell_type": "code",
1564 | "execution_count": 29,
1565 | "metadata": {
1566 | "collapsed": false
1567 | },
1568 | "outputs": [
1569 | {
1570 | "name": "stdout",
1571 | "output_type": "stream",
1572 | "text": [
1573 | "something,a,b,c,d,message\n",
1574 | "one,1,2,3.0,4,\n",
1575 | "two,5,6,,8,world\n",
1576 | "three,9,10,11.0,12,foo \n"
1577 | ]
1578 | }
1579 | ],
1580 | "source": [
1581 | "# 設定 cols 參數,只寫出一部分的欄位\n",
1582 | "df\n",
1583 | "df.to_csv(\"ex5-1.csv\", index = False, cols = ['a', 'b', 'c']) # 好像無效呢?\n",
1584 | "!type \"ex5-1.csv\""
1585 | ]
1586 | },
1587 | {
1588 | "cell_type": "code",
1589 | "execution_count": 30,
1590 | "metadata": {
1591 | "collapsed": false
1592 | },
1593 | "outputs": [
1594 | {
1595 | "data": {
1596 | "text/plain": [
1597 | "DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',\n",
1598 | " '2000-01-05', '2000-01-06', '2000-01-07'],\n",
1599 | " dtype='datetime64[ns]', freq='D')"
1600 | ]
1601 | },
1602 | "execution_count": 30,
1603 | "metadata": {},
1604 | "output_type": "execute_result"
1605 | }
1606 | ],
1607 | "source": [
1608 | "# Series 也有to_csv方法\n",
1609 | "dates = pd.date_range('1/1/2000', periods = 7)\n",
1610 | "dates"
1611 | ]
1612 | },
1613 | {
1614 | "cell_type": "code",
1615 | "execution_count": 31,
1616 | "metadata": {
1617 | "collapsed": false
1618 | },
1619 | "outputs": [
1620 | {
1621 | "data": {
1622 | "text/plain": [
1623 | "2000-01-01 0\n",
1624 | "2000-01-02 1\n",
1625 | "2000-01-03 2\n",
1626 | "2000-01-04 3\n",
1627 | "2000-01-05 4\n",
1628 | "2000-01-06 5\n",
1629 | "2000-01-07 6\n",
1630 | "Freq: D, dtype: int32"
1631 | ]
1632 | },
1633 | "execution_count": 31,
1634 | "metadata": {},
1635 | "output_type": "execute_result"
1636 | }
1637 | ],
1638 | "source": [
1639 | "ts = Series(np.arange(7), index = dates)\n",
1640 | "ts"
1641 | ]
1642 | },
1643 | {
1644 | "cell_type": "code",
1645 | "execution_count": 32,
1646 | "metadata": {
1647 | "collapsed": false
1648 | },
1649 | "outputs": [
1650 | {
1651 | "name": "stdout",
1652 | "output_type": "stream",
1653 | "text": [
1654 | "2000-01-01,0\n",
1655 | "2000-01-02,1\n",
1656 | "2000-01-03,2\n",
1657 | "2000-01-04,3\n",
1658 | "2000-01-05,4\n",
1659 | "2000-01-06,5\n",
1660 | "2000-01-07,6\n"
1661 | ]
1662 | }
1663 | ],
1664 | "source": [
1665 | "# Series物件 也有to_csv方法\n",
1666 | "ts.to_csv('treseries.csv')\n",
1667 | "!type \"treseries.csv\""
1668 | ]
1669 | },
1670 | {
1671 | "cell_type": "code",
1672 | "execution_count": 33,
1673 | "metadata": {
1674 | "collapsed": false
1675 | },
1676 | "outputs": [
1677 | {
1678 | "name": "stdout",
1679 | "output_type": "stream",
1680 | "text": [
1681 | "2000-01-01,0\n",
1682 | "2000-01-02,1\n",
1683 | "2000-01-03,2\n",
1684 | "2000-01-04,3\n",
1685 | "2000-01-05,4\n",
1686 | "2000-01-06,5\n",
1687 | "2000-01-07,6\n"
1688 | ]
1689 | }
1690 | ],
1691 | "source": [
1692 | "# Series類別 也有to_csv方法 (頂層)\n",
1693 | "Series.to_csv(ts, 'treseries.csv')\n",
1694 | "!type \"treseries.csv"
1695 | ]
1696 | },
1697 | {
1698 | "cell_type": "code",
1699 | "execution_count": 34,
1700 | "metadata": {
1701 | "collapsed": false
1702 | },
1703 | "outputs": [
1704 | {
1705 | "data": {
1706 | "text/plain": [
1707 | "2000-01-01 0\n",
1708 | "2000-01-02 1\n",
1709 | "2000-01-03 2\n",
1710 | "2000-01-04 3\n",
1711 | "2000-01-05 4\n",
1712 | "2000-01-06 5\n",
1713 | "2000-01-07 6\n",
1714 | "dtype: int64"
1715 | ]
1716 | },
1717 | "execution_count": 34,
1718 | "metadata": {},
1719 | "output_type": "execute_result"
1720 | }
1721 | ],
1722 | "source": [
1723 | "# 使用 from_csv 將檔案讀入成為 Series\n",
1724 | "# 有 date欄位,須設定 parse_dates 參數\n",
1725 | "ts = Series.from_csv('treseries.csv', parse_dates = True)\n",
1726 | "ts"
1727 | ]
1728 | },
1729 | {
1730 | "cell_type": "markdown",
1731 | "metadata": {},
1732 | "source": [
1733 | "## JSON(JavaScript Object Notation)數據"
1734 | ]
1735 | },
1736 | {
1737 | "cell_type": "code",
1738 | "execution_count": 35,
1739 | "metadata": {
1740 | "collapsed": true
1741 | },
1742 | "outputs": [],
1743 | "source": [
1744 | "obj = \"\"\"\n",
1745 | "{\n",
1746 | "\"name\": \"Wes\", \n",
1747 | "\"place_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
1748 | "\"pet\": null,\n",
1749 | "\"siblings\": [{\"name\": \"Scott\", \"age\": 25, \"pet\": \"Zuko\"}, {\"name\": \"Wei\", \"age\": 25, \"pet\": \"Cisco\"}]\n",
1750 | "}\n",
1751 | "\"\"\""
1752 | ]
1753 | },
1754 | {
1755 | "cell_type": "code",
1756 | "execution_count": 36,
1757 | "metadata": {
1758 | "collapsed": false
1759 | },
1760 | "outputs": [
1761 | {
1762 | "data": {
1763 | "text/plain": [
1764 | "{'name': 'Wes',\n",
1765 | " 'pet': None,\n",
1766 | " 'place_lived': ['United States', 'Spain', 'Germany'],\n",
1767 | " 'siblings': [{'age': 25, 'name': 'Scott', 'pet': 'Zuko'},\n",
1768 | " {'age': 25, 'name': 'Wei', 'pet': 'Cisco'}]}"
1769 | ]
1770 | },
1771 | "execution_count": 36,
1772 | "metadata": {},
1773 | "output_type": "execute_result"
1774 | }
1775 | ],
1776 | "source": [
1777 | "# 用 json.loads 可將JSON字串還原成 dict物件\n",
1778 | "import json\n",
1779 | "\n",
1780 | "result = json.loads(obj)\n",
1781 | "result"
1782 | ]
1783 | },
1784 | {
1785 | "cell_type": "code",
1786 | "execution_count": 37,
1787 | "metadata": {
1788 | "collapsed": false
1789 | },
1790 | "outputs": [
1791 | {
1792 | "data": {
1793 | "text/plain": [
1794 | "dict"
1795 | ]
1796 | },
1797 | "execution_count": 37,
1798 | "metadata": {},
1799 | "output_type": "execute_result"
1800 | }
1801 | ],
1802 | "source": [
1803 | "# JSON物件其實是 dict 物件\n",
1804 | "type(result)"
1805 | ]
1806 | },
1807 | {
1808 | "cell_type": "code",
1809 | "execution_count": 38,
1810 | "metadata": {
1811 | "collapsed": false
1812 | },
1813 | "outputs": [
1814 | {
1815 | "data": {
1816 | "text/plain": [
1817 | "int"
1818 | ]
1819 | },
1820 | "execution_count": 38,
1821 | "metadata": {},
1822 | "output_type": "execute_result"
1823 | }
1824 | ],
1825 | "source": [
1826 | "# 使用索引,可以探及 dict內部的資料\n",
1827 | "type(result['siblings'][0]['age'])"
1828 | ]
1829 | },
1830 | {
1831 | "cell_type": "code",
1832 | "execution_count": 39,
1833 | "metadata": {
1834 | "collapsed": false
1835 | },
1836 | "outputs": [
1837 | {
1838 | "data": {
1839 | "text/plain": [
1840 | "'{\"place_lived\": [\"United States\", \"Spain\", \"Germany\"], \"siblings\": [{\"pet\": \"Zuko\", \"age\": 25, \"name\": \"Scott\"}, {\"pet\": \"Cisco\", \"age\": 25, \"name\": \"Wei\"}], \"pet\": null, \"name\": \"Wes\"}'"
1841 | ]
1842 | },
1843 | "execution_count": 39,
1844 | "metadata": {},
1845 | "output_type": "execute_result"
1846 | }
1847 | ],
1848 | "source": [
1849 | "# json.dumps 可將dict物件轉換成 JSON字串\n",
1850 | "# json字串 和json物件 需區分清楚\n",
1851 | "# json物件 其實就是 dict\n",
1852 | "json.dumps(result)"
1853 | ]
1854 | },
1855 | {
1856 | "cell_type": "code",
1857 | "execution_count": 40,
1858 | "metadata": {
1859 | "collapsed": false
1860 | },
1861 | "outputs": [
1862 | {
1863 | "data": {
1864 | "text/plain": [
1865 | "[{'age': 25, 'name': 'Scott', 'pet': 'Zuko'},\n",
1866 | " {'age': 25, 'name': 'Wei', 'pet': 'Cisco'}]"
1867 | ]
1868 | },
1869 | "execution_count": 40,
1870 | "metadata": {},
1871 | "output_type": "execute_result"
1872 | }
1873 | ],
1874 | "source": [
1875 | "result['siblings']"
1876 | ]
1877 | },
1878 | {
1879 | "cell_type": "code",
1880 | "execution_count": 41,
1881 | "metadata": {
1882 | "collapsed": false
1883 | },
1884 | "outputs": [
1885 | {
1886 | "data": {
1887 | "text/html": [
1888 | "\n",
1889 | "
\n",
1890 | " \n",
1891 | " \n",
1892 | " | \n",
1893 | " 0 | \n",
1894 | " 1 | \n",
1895 | "
\n",
1896 | " \n",
1897 | " \n",
1898 | " \n",
1899 | " age | \n",
1900 | " 25 | \n",
1901 | " 25 | \n",
1902 | "
\n",
1903 | " \n",
1904 | " name | \n",
1905 | " Scott | \n",
1906 | " Wei | \n",
1907 | "
\n",
1908 | " \n",
1909 | " pet | \n",
1910 | " Zuko | \n",
1911 | " Cisco | \n",
1912 | "
\n",
1913 | " \n",
1914 | "
\n",
1915 | "
"
1916 | ],
1917 | "text/plain": [
1918 | " 0 1\n",
1919 | "age 25 25\n",
1920 | "name Scott Wei\n",
1921 | "pet Zuko Cisco"
1922 | ]
1923 | },
1924 | "execution_count": 41,
1925 | "metadata": {},
1926 | "output_type": "execute_result"
1927 | }
1928 | ],
1929 | "source": [
1930 | "# 以JSON物件建構DataFrame\n",
1931 | "df_siblings = DataFrame(result['siblings'], columns = ['age', 'name', 'pet']).T\n",
1932 | "df_siblings"
1933 | ]
1934 | },
1935 | {
1936 | "cell_type": "code",
1937 | "execution_count": 42,
1938 | "metadata": {
1939 | "collapsed": false
1940 | },
1941 | "outputs": [
1942 | {
1943 | "data": {
1944 | "text/plain": [
1945 | "'{\"0\":{\"age\":25,\"name\":\"Scott\",\"pet\":\"Zuko\"},\"1\":{\"age\":25,\"name\":\"Wei\",\"pet\":\"Cisco\"}}'"
1946 | ]
1947 | },
1948 | "execution_count": 42,
1949 | "metadata": {},
1950 | "output_type": "execute_result"
1951 | }
1952 | ],
1953 | "source": [
1954 | "# DataFrame有 to_json() 方法,可將DataFrame序列化\n",
1955 | "siblings_json_string = df_siblings.to_json()\n",
1956 | "siblings_json_string"
1957 | ]
1958 | },
1959 | {
1960 | "cell_type": "code",
1961 | "execution_count": 43,
1962 | "metadata": {
1963 | "collapsed": false
1964 | },
1965 | "outputs": [
1966 | {
1967 | "data": {
1968 | "text/plain": [
1969 | "{'0': {'age': 25, 'name': 'Scott', 'pet': 'Zuko'},\n",
1970 | " '1': {'age': 25, 'name': 'Wei', 'pet': 'Cisco'}}"
1971 | ]
1972 | },
1973 | "execution_count": 43,
1974 | "metadata": {},
1975 | "output_type": "execute_result"
1976 | }
1977 | ],
1978 | "source": [
1979 | "siblings_json = json.loads(siblings_json_string)\n",
1980 | "siblings_json"
1981 | ]
1982 | },
1983 | {
1984 | "cell_type": "code",
1985 | "execution_count": 44,
1986 | "metadata": {
1987 | "collapsed": false
1988 | },
1989 | "outputs": [
1990 | {
1991 | "data": {
1992 | "text/html": [
1993 | "\n",
1994 | "
\n",
1995 | " \n",
1996 | " \n",
1997 | " | \n",
1998 | " 0 | \n",
1999 | " 1 | \n",
2000 | "
\n",
2001 | " \n",
2002 | " \n",
2003 | " \n",
2004 | " age | \n",
2005 | " 25 | \n",
2006 | " 25 | \n",
2007 | "
\n",
2008 | " \n",
2009 | " name | \n",
2010 | " Scott | \n",
2011 | " Wei | \n",
2012 | "
\n",
2013 | " \n",
2014 | " pet | \n",
2015 | " Zuko | \n",
2016 | " Cisco | \n",
2017 | "
\n",
2018 | " \n",
2019 | "
\n",
2020 | "
"
2021 | ],
2022 | "text/plain": [
2023 | " 0 1\n",
2024 | "age 25 25\n",
2025 | "name Scott Wei\n",
2026 | "pet Zuko Cisco"
2027 | ]
2028 | },
2029 | "execution_count": 44,
2030 | "metadata": {},
2031 | "output_type": "execute_result"
2032 | }
2033 | ],
2034 | "source": [
2035 | "# DataFrame有 from_dict() 方法,可反序列化\n",
2036 | "df_siblings = DataFrame.from_dict(siblings_json)\n",
2037 | "df_siblings"
2038 | ]
2039 | },
2040 | {
2041 | "cell_type": "markdown",
2042 | "metadata": {},
2043 | "source": [
2044 | "## Web訊息收集"
2045 | ]
2046 | },
2047 | {
2048 | "cell_type": "markdown",
2049 | "metadata": {},
2050 | "source": [
2051 | "[Yahoo股票資料抓取](../%E7%B7%B4%E7%BF%92%20-%20%E8%82%A1%E7%A5%A8%E8%B3%87%E6%96%99%E5%BD%99%E6%95%B4_Yahoo%E8%82%A1%E5%B8%82%20-%20%E5%95%8F%E9%A1%8C.ipynb)"
2052 | ]
2053 | },
2054 | {
2055 | "cell_type": "markdown",
2056 | "metadata": {},
2057 | "source": [
2058 | "## 二進制數據格式"
2059 | ]
2060 | },
2061 | {
2062 | "cell_type": "code",
2063 | "execution_count": 45,
2064 | "metadata": {
2065 | "collapsed": false
2066 | },
2067 | "outputs": [
2068 | {
2069 | "data": {
2070 | "text/html": [
2071 | "\n",
2072 | "
\n",
2073 | " \n",
2074 | " \n",
2075 | " | \n",
2076 | " a | \n",
2077 | " b | \n",
2078 | " c | \n",
2079 | " d | \n",
2080 | " message | \n",
2081 | "
\n",
2082 | " \n",
2083 | " \n",
2084 | " \n",
2085 | " 0 | \n",
2086 | " 1 | \n",
2087 | " 2 | \n",
2088 | " 3 | \n",
2089 | " 4 | \n",
2090 | " hello | \n",
2091 | "
\n",
2092 | " \n",
2093 | " 1 | \n",
2094 | " 5 | \n",
2095 | " 6 | \n",
2096 | " 7 | \n",
2097 | " 8 | \n",
2098 | " world | \n",
2099 | "
\n",
2100 | " \n",
2101 | " 2 | \n",
2102 | " 9 | \n",
2103 | " 10 | \n",
2104 | " 11 | \n",
2105 | " 12 | \n",
2106 | " foo | \n",
2107 | "
\n",
2108 | " \n",
2109 | "
\n",
2110 | "
"
2111 | ],
2112 | "text/plain": [
2113 | " a b c d message\n",
2114 | "0 1 2 3 4 hello\n",
2115 | "1 5 6 7 8 world\n",
2116 | "2 9 10 11 12 foo"
2117 | ]
2118 | },
2119 | "execution_count": 45,
2120 | "metadata": {},
2121 | "output_type": "execute_result"
2122 | }
2123 | ],
2124 | "source": [
2125 | "# pandas物件都有一個 save方法,可以將物件數據以pickle的形式保存到硬碟\n",
2126 | "df = pd.read_csv('ex1.csv')\n",
2127 | "df"
2128 | ]
2129 | },
2130 | {
2131 | "cell_type": "code",
2132 | "execution_count": 46,
2133 | "metadata": {
2134 | "collapsed": false
2135 | },
2136 | "outputs": [
2137 | {
2138 | "data": {
2139 | "text/plain": [
2140 | "pandas.core.frame.DataFrame"
2141 | ]
2142 | },
2143 | "execution_count": 46,
2144 | "metadata": {},
2145 | "output_type": "execute_result"
2146 | }
2147 | ],
2148 | "source": [
2149 | "type(df)"
2150 | ]
2151 | },
2152 | {
2153 | "cell_type": "code",
2154 | "execution_count": 47,
2155 | "metadata": {
2156 | "collapsed": false
2157 | },
2158 | "outputs": [],
2159 | "source": [
2160 | "# 輸出 pickle資料到檔案\n",
2161 | "import pickle\n",
2162 | "df.to_pickle('ex1.pickle')\n",
2163 | "df = None\n",
2164 | "del df"
2165 | ]
2166 | },
2167 | {
2168 | "cell_type": "code",
2169 | "execution_count": 48,
2170 | "metadata": {
2171 | "collapsed": false
2172 | },
2173 | "outputs": [
2174 | {
2175 | "data": {
2176 | "text/html": [
2177 | "\n",
2178 | "
\n",
2179 | " \n",
2180 | " \n",
2181 | " | \n",
2182 | " a | \n",
2183 | " b | \n",
2184 | " c | \n",
2185 | " d | \n",
2186 | " message | \n",
2187 | "
\n",
2188 | " \n",
2189 | " \n",
2190 | " \n",
2191 | " 0 | \n",
2192 | " 1 | \n",
2193 | " 2 | \n",
2194 | " 3 | \n",
2195 | " 4 | \n",
2196 | " hello | \n",
2197 | "
\n",
2198 | " \n",
2199 | " 1 | \n",
2200 | " 5 | \n",
2201 | " 6 | \n",
2202 | " 7 | \n",
2203 | " 8 | \n",
2204 | " world | \n",
2205 | "
\n",
2206 | " \n",
2207 | " 2 | \n",
2208 | " 9 | \n",
2209 | " 10 | \n",
2210 | " 11 | \n",
2211 | " 12 | \n",
2212 | " foo | \n",
2213 | "
\n",
2214 | " \n",
2215 | "
\n",
2216 | "
"
2217 | ],
2218 | "text/plain": [
2219 | " a b c d message\n",
2220 | "0 1 2 3 4 hello\n",
2221 | "1 5 6 7 8 world\n",
2222 | "2 9 10 11 12 foo"
2223 | ]
2224 | },
2225 | "execution_count": 48,
2226 | "metadata": {},
2227 | "output_type": "execute_result"
2228 | }
2229 | ],
2230 | "source": [
2231 | "# 讀入 pickle檔案資料成為物件 \n",
2232 | "df = pickle.load(open('ex1.pickle', 'rb'))\n",
2233 | "df"
2234 | ]
2235 | },
2236 | {
2237 | "cell_type": "code",
2238 | "execution_count": 49,
2239 | "metadata": {
2240 | "collapsed": false
2241 | },
2242 | "outputs": [
2243 | {
2244 | "data": {
2245 | "text/plain": [
2246 | "pandas.core.frame.DataFrame"
2247 | ]
2248 | },
2249 | "execution_count": 49,
2250 | "metadata": {},
2251 | "output_type": "execute_result"
2252 | }
2253 | ],
2254 | "source": [
2255 | "type(df)"
2256 | ]
2257 | },
2258 | {
2259 | "cell_type": "markdown",
2260 | "metadata": {},
2261 | "source": [
2262 | "### 讀取 Microsoft Excel文件"
2263 | ]
2264 | },
2265 | {
2266 | "cell_type": "code",
2267 | "execution_count": 50,
2268 | "metadata": {
2269 | "collapsed": false
2270 | },
2271 | "outputs": [
2272 | {
2273 | "data": {
2274 | "text/html": [
2275 | "\n",
2276 | "
\n",
2277 | " \n",
2278 | " \n",
2279 | " | \n",
2280 | " 時間 | \n",
2281 | " 溫度 | \n",
2282 | " 濕度 | \n",
2283 | "
\n",
2284 | " \n",
2285 | " \n",
2286 | " \n",
2287 | " 0 | \n",
2288 | " 2016-02-01 10:35:00.000 | \n",
2289 | " 12 | \n",
2290 | " 40 | \n",
2291 | "
\n",
2292 | " \n",
2293 | " 1 | \n",
2294 | " 2016-02-01 10:36:00.000 | \n",
2295 | " 13 | \n",
2296 | " 41 | \n",
2297 | "
\n",
2298 | " \n",
2299 | " 2 | \n",
2300 | " 2016-02-01 10:36:59.995 | \n",
2301 | " 14 | \n",
2302 | " 42 | \n",
2303 | "
\n",
2304 | " \n",
2305 | "
\n",
2306 | "
"
2307 | ],
2308 | "text/plain": [
2309 | " 時間 溫度 濕度\n",
2310 | "0 2016-02-01 10:35:00.000 12 40\n",
2311 | "1 2016-02-01 10:36:00.000 13 41\n",
2312 | "2 2016-02-01 10:36:59.995 14 42"
2313 | ]
2314 | },
2315 | "execution_count": 50,
2316 | "metadata": {},
2317 | "output_type": "execute_result"
2318 | }
2319 | ],
2320 | "source": [
2321 | "# 使用 ExcelFile 方法\n",
2322 | "xls_file = pd.ExcelFile('test.xls', header = None)\n",
2323 | "table = xls_file.parse('Sheet1')\n",
2324 | "table"
2325 | ]
2326 | },
2327 | {
2328 | "cell_type": "code",
2329 | "execution_count": 51,
2330 | "metadata": {
2331 | "collapsed": false
2332 | },
2333 | "outputs": [
2334 | {
2335 | "data": {
2336 | "text/plain": [
2337 | "pandas.core.frame.DataFrame"
2338 | ]
2339 | },
2340 | "execution_count": 51,
2341 | "metadata": {},
2342 | "output_type": "execute_result"
2343 | }
2344 | ],
2345 | "source": [
2346 | "type(table)"
2347 | ]
2348 | },
2349 | {
2350 | "cell_type": "markdown",
2351 | "metadata": {},
2352 | "source": [
2353 | "## 使用數據庫"
2354 | ]
2355 | },
2356 | {
2357 | "cell_type": "code",
2358 | "execution_count": 52,
2359 | "metadata": {
2360 | "collapsed": false
2361 | },
2362 | "outputs": [
2363 | {
2364 | "data": {
2365 | "text/plain": [
2366 | "[('Atlanta', 'Georgia', 1.25, 6),\n",
2367 | " ('Tallahassee', 'Florida', 2.6, 3),\n",
2368 | " ('Sacramento', 'California', 1.7, 5)]"
2369 | ]
2370 | },
2371 | "execution_count": 52,
2372 | "metadata": {},
2373 | "output_type": "execute_result"
2374 | }
2375 | ],
2376 | "source": [
2377 | "# 使用 SQLite3\n",
2378 | "\n",
2379 | "import sqlite3\n",
2380 | "\n",
2381 | "# 連接資料庫\n",
2382 | "con = sqlite3.connect(':memory:')\n",
2383 | "\n",
2384 | "# 建立資料表\n",
2385 | "query = \"\"\"\n",
2386 | "CREATE TABLE test\n",
2387 | "(a VARCHAR(20), b VARCHAR(20), c REAL, d INTEGER);\n",
2388 | "\"\"\"\n",
2389 | "con.execute(query)\n",
2390 | "con.commit()\n",
2391 | "\n",
2392 | "# 插入資料\n",
2393 | "data = [('Atlanta', 'Georgia', 1.25, 6), \n",
2394 | " ('Tallahassee', 'Florida', 2.6, 3), \n",
2395 | " ('Sacramento', 'California', 1.7, 5)]\n",
2396 | "stmt = \"INSERT INTO test VALUES(?, ?, ?, ?)\"\n",
2397 | "con.executemany(stmt, data)\n",
2398 | "con.commit()\n",
2399 | "\n",
2400 | "\n",
2401 | "# 查詢資料\n",
2402 | "cursor = con.execute('select * from test')\n",
2403 | "rows = cursor.fetchall()\n",
2404 | "rows"
2405 | ]
2406 | },
2407 | {
2408 | "cell_type": "code",
2409 | "execution_count": 53,
2410 | "metadata": {
2411 | "collapsed": false
2412 | },
2413 | "outputs": [
2414 | {
2415 | "data": {
2416 | "text/plain": [
2417 | "(('a', None, None, None, None, None, None),\n",
2418 | " ('b', None, None, None, None, None, None),\n",
2419 | " ('c', None, None, None, None, None, None),\n",
2420 | " ('d', None, None, None, None, None, None))"
2421 | ]
2422 | },
2423 | "execution_count": 53,
2424 | "metadata": {},
2425 | "output_type": "execute_result"
2426 | }
2427 | ],
2428 | "source": [
2429 | "# cursor.description 包含 欄位資訊\n",
2430 | "cursor.description"
2431 | ]
2432 | },
2433 | {
2434 | "cell_type": "code",
2435 | "execution_count": 54,
2436 | "metadata": {
2437 | "collapsed": false
2438 | },
2439 | "outputs": [
2440 | {
2441 | "data": {
2442 | "text/html": [
2443 | "\n",
2444 | "
\n",
2445 | " \n",
2446 | " \n",
2447 | " | \n",
2448 | " a | \n",
2449 | " b | \n",
2450 | " c | \n",
2451 | " d | \n",
2452 | "
\n",
2453 | " \n",
2454 | " \n",
2455 | " \n",
2456 | " 0 | \n",
2457 | " Atlanta | \n",
2458 | " Georgia | \n",
2459 | " 1.25 | \n",
2460 | " 6 | \n",
2461 | "
\n",
2462 | " \n",
2463 | " 1 | \n",
2464 | " Tallahassee | \n",
2465 | " Florida | \n",
2466 | " 2.60 | \n",
2467 | " 3 | \n",
2468 | "
\n",
2469 | " \n",
2470 | " 2 | \n",
2471 | " Sacramento | \n",
2472 | " California | \n",
2473 | " 1.70 | \n",
2474 | " 5 | \n",
2475 | "
\n",
2476 | " \n",
2477 | "
\n",
2478 | "
"
2479 | ],
2480 | "text/plain": [
2481 | " a b c d\n",
2482 | "0 Atlanta Georgia 1.25 6\n",
2483 | "1 Tallahassee Florida 2.60 3\n",
2484 | "2 Sacramento California 1.70 5"
2485 | ]
2486 | },
2487 | "execution_count": 54,
2488 | "metadata": {},
2489 | "output_type": "execute_result"
2490 | }
2491 | ],
2492 | "source": [
2493 | "# 用資料庫的資料建立 DataFrame\n",
2494 | "df = DataFrame(rows, columns = [f[0] for f in cursor.description])\n",
2495 | "df"
2496 | ]
2497 | },
2498 | {
2499 | "cell_type": "code",
2500 | "execution_count": 55,
2501 | "metadata": {
2502 | "collapsed": false
2503 | },
2504 | "outputs": [
2505 | {
2506 | "data": {
2507 | "text/html": [
2508 | "\n",
2509 | "
\n",
2510 | " \n",
2511 | " \n",
2512 | " | \n",
2513 | " a | \n",
2514 | " b | \n",
2515 | " c | \n",
2516 | " d | \n",
2517 | "
\n",
2518 | " \n",
2519 | " \n",
2520 | " \n",
2521 | " 0 | \n",
2522 | " Atlanta | \n",
2523 | " Georgia | \n",
2524 | " 1.25 | \n",
2525 | " 6 | \n",
2526 | "
\n",
2527 | " \n",
2528 | " 1 | \n",
2529 | " Tallahassee | \n",
2530 | " Florida | \n",
2531 | " 2.60 | \n",
2532 | " 3 | \n",
2533 | "
\n",
2534 | " \n",
2535 | " 2 | \n",
2536 | " Sacramento | \n",
2537 | " California | \n",
2538 | " 1.70 | \n",
2539 | " 5 | \n",
2540 | "
\n",
2541 | " \n",
2542 | "
\n",
2543 | "
"
2544 | ],
2545 | "text/plain": [
2546 | " a b c d\n",
2547 | "0 Atlanta Georgia 1.25 6\n",
2548 | "1 Tallahassee Florida 2.60 3\n",
2549 | "2 Sacramento California 1.70 5"
2550 | ]
2551 | },
2552 | "execution_count": 55,
2553 | "metadata": {},
2554 | "output_type": "execute_result"
2555 | }
2556 | ],
2557 | "source": [
2558 | "# 使用 pandas.io.sql 來讀取資料庫資料並創建 DataFrame\n",
2559 | "import pandas.io.sql as sql\n",
2560 | "df = sql.read_sql('select * from test', con)\n",
2561 | "df"
2562 | ]
2563 | }
2564 | ],
2565 | "metadata": {
2566 | "anaconda-cloud": {},
2567 | "kernelspec": {
2568 | "display_name": "Python [default]",
2569 | "language": "python",
2570 | "name": "python3"
2571 | },
2572 | "language_info": {
2573 | "codemirror_mode": {
2574 | "name": "ipython",
2575 | "version": 3
2576 | },
2577 | "file_extension": ".py",
2578 | "mimetype": "text/x-python",
2579 | "name": "python",
2580 | "nbconvert_exporter": "python",
2581 | "pygments_lexer": "ipython3",
2582 | "version": "3.5.1"
2583 | }
2584 | },
2585 | "nbformat": 4,
2586 | "nbformat_minor": 0
2587 | }
2588 |
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex1.csv:
--------------------------------------------------------------------------------
1 | a,b,c,d,message
2 | 1,2,3,4,hello
3 | 5,6,7,8,world
4 | 9,10,11,12,foo
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex1.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wei1234c/Introduction_to_Pandas/af523d3dc81a1bf49a03740781b1a918ace2cfc9/notebooks/2. Pandas - IO tools/ex1.pickle
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex2.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,hello
2 | 5,6,7,8,world
3 | 9,10,11,12,foo
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex3 - 1.csv:
--------------------------------------------------------------------------------
1 | A B C
2 | aaa -0.264 -1.026 -0.619
3 | bbb 0.927 0.302 -0.032
4 | ccc -0.265 -0.385 -0.217
5 |
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex3.csv:
--------------------------------------------------------------------------------
1 | key1,key2,value1,value2
2 | one,a,1,2
3 | one,b,3,4
4 | one,c,5,6
5 | one,d,7,8
6 | two,a,9,10
7 | two,b,11,12
8 | two,c,13,14
9 | two,d,15,16
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex4.csv:
--------------------------------------------------------------------------------
1 | # hey!
2 | a,b,c,d,message
3 | # just wanted to make things more difficult for you
4 | # who read CSV files with computers, anyway?
5 | 1,2,3,4,hello
6 | 5,6,7,8,world
7 | 9,10,11,12,foo
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex5-1.csv:
--------------------------------------------------------------------------------
1 | something,a,b,c,d,message
2 | one,1,2,3.0,4,
3 | two,5,6,,8,world
4 | three,9,10,11.0,12,foo
5 |
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex5.csv:
--------------------------------------------------------------------------------
1 | something,a,b,c,d,message
2 | one,1,2,3,4,NA
3 | two,5,6,,8,world
4 | three,9,10,11,12,foo
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex6-o.csv:
--------------------------------------------------------------------------------
1 | obj = """
{
"name": "Wes",
"place_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"}, {"name": "Wei", "age": 25, "pet": "Cisco"}]
}
"""
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/ex6.csv:
--------------------------------------------------------------------------------
1 | something;a;b;c;d;message
2 | one;1;2;3;4;NA
3 | two;5;6;;8;world
4 | three;9;10;11;12;foo
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/mta.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 373889
5 |
6 | Metro-North Railroad
7 | 12
8 |
9 |
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/mydata.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wei1234c/Introduction_to_Pandas/af523d3dc81a1bf49a03740781b1a918ace2cfc9/notebooks/2. Pandas - IO tools/mydata.h5
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wei1234c/Introduction_to_Pandas/af523d3dc81a1bf49a03740781b1a918ace2cfc9/notebooks/2. Pandas - IO tools/test.xls
--------------------------------------------------------------------------------
/notebooks/2. Pandas - IO tools/treseries.csv:
--------------------------------------------------------------------------------
1 | 2000-01-01,0
2 | 2000-01-02,1
3 | 2000-01-03,2
4 | 2000-01-04,3
5 | 2000-01-05,4
6 | 2000-01-06,5
7 | 2000-01-07,6
8 |
--------------------------------------------------------------------------------
/notebooks/練習 - 股票資料彙整_YahooFinance - 問題.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 練習 - 股票資料彙整_YahooFinance - 問題"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": false
14 | },
15 | "source": [
16 | "### 問題: \n",
17 | "使用 pandas_datareader.data.DataReader 抓取 2356.TW, 1566.TWO 最近一個月的股價資料 "
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "參考資料: \n",
25 | "http://pandas.pydata.org/pandas-docs/stable/remote_data.html \n",
26 | "http://www.predream.org/show-58-171-1.html \n",
27 | "http://stackoverflow.com/questions/22991567/pandas-yahoo-finance-datareader"
28 | ]
29 | }
30 | ],
31 | "metadata": {
32 | "kernelspec": {
33 | "display_name": "Python 3",
34 | "language": "python",
35 | "name": "python3"
36 | },
37 | "language_info": {
38 | "codemirror_mode": {
39 | "name": "ipython",
40 | "version": 3
41 | },
42 | "file_extension": ".py",
43 | "mimetype": "text/x-python",
44 | "name": "python",
45 | "nbconvert_exporter": "python",
46 | "pygments_lexer": "ipython3",
47 | "version": "3.5.1"
48 | }
49 | },
50 | "nbformat": 4,
51 | "nbformat_minor": 0
52 | }
53 |
--------------------------------------------------------------------------------
/notebooks/練習 - 股票資料彙整_YahooFinance - 解答.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 練習 - 股票資料彙整_YahooFinance - 解答\n",
8 | "http://pandas.pydata.org/pandas-docs/stable/remote_data.html \n",
9 | "http://www.predream.org/show-58-171-1.html \n",
10 | "http://stackoverflow.com/questions/22991567/pandas-yahoo-finance-datareader"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "collapsed": false
18 | },
19 | "outputs": [],
20 | "source": [
21 | "import numpy as np\n",
22 | "import pandas as pd\n",
23 | "from pandas import Series, DataFrame\n",
24 | "import datetime\n",
25 | "import timeit"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {
32 | "collapsed": false
33 | },
34 | "outputs": [],
35 | "source": [
36 | "from pandas_datareader import data, wb\n",
37 | "import datetime \n",
38 | "\n",
39 | "def getWebData(name, \n",
40 | " start = datetime.date(1970, 1, 1), \n",
41 | " end = datetime.date.today(), \n",
42 | " data_source = 'yahoo', \n",
43 | " retry_count=3, \n",
44 | " pause=0.001):\n",
45 | " \n",
46 | " df = data.DataReader(name = name, \n",
47 | " data_source = data_source,\n",
48 | " start = start,\n",
49 | " end = end,\n",
50 | " retry_count = retry_count,\n",
51 | " pause = pause\n",
52 | " ) \n",
53 | "\n",
54 | " df = df.to_frame()\n",
55 | " df.index.names = ['Date', 'Name'] \n",
56 | " \n",
57 | " return df"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "## Main"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 3,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "def fetchAndStoreStockData(stocks):\n",
76 | " \n",
77 | "# start = datetime.datetime(1965, 1, 1)\n",
78 | "# end = datetime.datetime(2013, 1, 1) \n",
79 | " df = getWebData(stocks) \n",
80 | "\n",
81 | " # Write to files __________________________ \n",
82 | " df.to_excel('Yahoo Finance{0}.xlsx'.format(''))"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 4,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "Wall time: 5.46 s\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "if __name__ == '__main__':\n",
102 | " %time fetchAndStoreStockData(stocks = ['2356.TW', '1566.TWO'])"
103 | ]
104 | }
105 | ],
106 | "metadata": {
107 | "kernelspec": {
108 | "display_name": "Python [default]",
109 | "language": "python",
110 | "name": "python3"
111 | },
112 | "language_info": {
113 | "codemirror_mode": {
114 | "name": "ipython",
115 | "version": 3
116 | },
117 | "file_extension": ".py",
118 | "mimetype": "text/x-python",
119 | "name": "python",
120 | "nbconvert_exporter": "python",
121 | "pygments_lexer": "ipython3",
122 | "version": "3.5.1"
123 | }
124 | },
125 | "nbformat": 4,
126 | "nbformat_minor": 0
127 | }
128 |
--------------------------------------------------------------------------------
/notebooks/練習 - 股票資料彙整_Yahoo股市 - 問題.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 練習 - 股票資料彙整_Yahoo股市 - 問題"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [
17 | {
18 | "data": {
19 | "text/plain": [
20 | "'https://tw.stock.yahoo.com/s/list.php?c=tse&pid=1'"
21 | ]
22 | },
23 | "execution_count": 1,
24 | "metadata": {},
25 | "output_type": "execute_result"
26 | }
27 | ],
28 | "source": [
29 | "page = 1\n",
30 | "url = 'https://tw.stock.yahoo.com/s/list.php?c=tse&pid=' + str(page)\n",
31 | "url"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## 目標: \n",
39 | "- 使用 Pandas,抓取上述 url 網頁中的股價資料\n",
40 | "- 將股票代號與名稱區隔為不同的欄位\n",
41 | "- 將資料儲存為 Excel 檔案\n",
42 | "- 須注意個欄位的格式,數字欄位的儲存格式應該為數字\n",
43 | "- 重排欄位順序為:'市場別', '股票代號', '股票名稱', '日期', '時間', '成交', '買進', '賣出', '漲跌', '張數', '昨收', '開盤', '最高', '最低'\n",
44 | "- Extra:\n",
45 | " - 匯集 Yahoo 股市 page 1~ 5 的資料 (pd.concat)\n",
46 | " - 依據股票代號的前兩碼,做 GroupBy 操作\n",
47 | " - merge ../data/個股_類別.xls(先解壓縮 個股_類別.rar) 中的資料之後,做 GroupBy 操作 "
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {
54 | "collapsed": false
55 | },
56 | "outputs": [],
57 | "source": []
58 | }
59 | ],
60 | "metadata": {
61 | "anaconda-cloud": {},
62 | "kernelspec": {
63 | "display_name": "Python [default]",
64 | "language": "python",
65 | "name": "python3"
66 | },
67 | "language_info": {
68 | "codemirror_mode": {
69 | "name": "ipython",
70 | "version": 3
71 | },
72 | "file_extension": ".py",
73 | "mimetype": "text/x-python",
74 | "name": "python",
75 | "nbconvert_exporter": "python",
76 | "pygments_lexer": "ipython3",
77 | "version": "3.5.1"
78 | }
79 | },
80 | "nbformat": 4,
81 | "nbformat_minor": 0
82 | }
83 |
--------------------------------------------------------------------------------
/notebooks/練習 - 股票資料彙整_Yahoo股市 - 解答.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 練習 - 股票資料彙整_Yahoo股市 - 解答"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "from pandas import Series, DataFrame"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {
27 | "collapsed": true
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import datetime"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "目標資料來源: \n",
39 | "https://tw.stock.yahoo.com/s/list.php?c=tse&pid=1"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "## 抓取網頁資料"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 3,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "import requests\n",
58 | "\n",
59 | "def get_yahoo_page_html(url): \n",
60 | " html = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'})\n",
61 | " return html.text"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 4,
67 | "metadata": {
68 | "collapsed": true
69 | },
70 | "outputs": [],
71 | "source": [
72 | "def getDataOnePage(html):\n",
73 | " targetTableIndex = 0\n",
74 | " table = pd.read_html(html,\n",
75 | " attrs = {'border': '1' , \n",
76 | " 'cellspacing': '0', \n",
77 | " 'cellpadding': '2', \n",
78 | " 'bgcolor': '#ffffff'},\n",
79 | " header = 0\n",
80 | " )[targetTableIndex]\n",
81 | " \n",
82 | " return table"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 5,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [],
92 | "source": [
93 | "def getDataOnePageTSE(page):\n",
94 | " url = 'https://tw.stock.yahoo.com/s/list.php?c=tse&pid=' + str(page) \n",
95 | " return getDataOnePage(html = get_yahoo_page_html(url))"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [],
105 | "source": [
106 | "# 抓第一頁的資料\n",
107 | "df = getDataOnePageTSE(1)"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 7,
113 | "metadata": {
114 | "collapsed": false
115 | },
116 | "outputs": [
117 | {
118 | "data": {
119 | "text/html": [
120 | "\n",
121 | "
\n",
122 | " \n",
123 | " \n",
124 | " | \n",
125 | " 選擇 | \n",
126 | " 股票代號 | \n",
127 | " 時間 | \n",
128 | " 成交 | \n",
129 | " 買進 | \n",
130 | " 賣出 | \n",
131 | " 漲跌 | \n",
132 | " 張數 | \n",
133 | " 昨收 | \n",
134 | " 開盤 | \n",
135 | " 最高 | \n",
136 | " 最低 | \n",
137 | " 凱基證券下單 | \n",
138 | "
\n",
139 | " \n",
140 | " \n",
141 | " \n",
142 | " 195 | \n",
143 | " NaN | \n",
144 | " 4763 材料-KY | \n",
145 | " 14:30 | \n",
146 | " 121.50 | \n",
147 | " 121.5 | \n",
148 | " 122.0 | \n",
149 | " 0.00 | \n",
150 | " 994 | \n",
151 | " 121.50 | \n",
152 | " 121.0 | \n",
153 | " 128.00 | \n",
154 | " 120.50 | \n",
155 | " 買 賣 張 零股交易 | \n",
156 | "
\n",
157 | " \n",
158 | " 196 | \n",
159 | " NaN | \n",
160 | " 1598 岱宇 | \n",
161 | " 13:30 | \n",
162 | " 46.05 | \n",
163 | " 46.00 | \n",
164 | " 46.05 | \n",
165 | " ▽0.50 | \n",
166 | " 252 | \n",
167 | " 46.55 | \n",
168 | " 46.50 | \n",
169 | " 46.95 | \n",
170 | " 46.00 | \n",
171 | " 買 賣 張 零股交易 | \n",
172 | "
\n",
173 | " \n",
174 | " 197 | \n",
175 | " NaN | \n",
176 | " 1701 中化 | \n",
177 | " 13:30 | \n",
178 | " 18.10 | \n",
179 | " 18.10 | \n",
180 | " 18.15 | \n",
181 | " ▽0.05 | \n",
182 | " 218 | \n",
183 | " 18.15 | \n",
184 | " 18.20 | \n",
185 | " 18.20 | \n",
186 | " 18.05 | \n",
187 | " 買 賣 張 零股交易 | \n",
188 | "
\n",
189 | " \n",
190 | " 198 | \n",
191 | " NaN | \n",
192 | " 1707 葡萄王 | \n",
193 | " 13:30 | \n",
194 | " 261.00 | \n",
195 | " 261.0 | \n",
196 | " 261.5 | \n",
197 | " △3.5 | \n",
198 | " 537 | \n",
199 | " 257.50 | \n",
200 | " 261.0 | \n",
201 | " 262.50 | \n",
202 | " 258.00 | \n",
203 | " 買 賣 張 零股交易 | \n",
204 | "
\n",
205 | " \n",
206 | " 199 | \n",
207 | " NaN | \n",
208 | " 1720 生達 | \n",
209 | " 13:30 | \n",
210 | " 33.45 | \n",
211 | " 33.40 | \n",
212 | " 33.45 | \n",
213 | " 0.00 | \n",
214 | " 179 | \n",
215 | " 33.45 | \n",
216 | " 33.60 | \n",
217 | " 33.60 | \n",
218 | " 33.30 | \n",
219 | " 買 賣 張 零股交易 | \n",
220 | "
\n",
221 | " \n",
222 | "
\n",
223 | "
"
224 | ],
225 | "text/plain": [
226 | " 選擇 股票代號 時間 成交 買進 賣出 漲跌 張數 昨收 開盤 \\\n",
227 | "195 NaN 4763 材料-KY 14:30 121.50 121.5 122.0 0.00 994 121.50 121.0 \n",
228 | "196 NaN 1598 岱宇 13:30 46.05 46.00 46.05 ▽0.50 252 46.55 46.50 \n",
229 | "197 NaN 1701 中化 13:30 18.10 18.10 18.15 ▽0.05 218 18.15 18.20 \n",
230 | "198 NaN 1707 葡萄王 13:30 261.00 261.0 261.5 △3.5 537 257.50 261.0 \n",
231 | "199 NaN 1720 生達 13:30 33.45 33.40 33.45 0.00 179 33.45 33.60 \n",
232 | "\n",
233 | " 最高 最低 凱基證券下單 \n",
234 | "195 128.00 120.50 買 賣 張 零股交易 \n",
235 | "196 46.95 46.00 買 賣 張 零股交易 \n",
236 | "197 18.20 18.05 買 賣 張 零股交易 \n",
237 | "198 262.50 258.00 買 賣 張 零股交易 \n",
238 | "199 33.60 33.30 買 賣 張 零股交易 "
239 | ]
240 | },
241 | "execution_count": 7,
242 | "metadata": {},
243 | "output_type": "execute_result"
244 | }
245 | ],
246 | "source": [
247 | "df.tail()"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 8,
253 | "metadata": {
254 | "collapsed": false
255 | },
256 | "outputs": [],
257 | "source": [
258 | "df.to_excel('stock.xlsx')"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "## 修整 DataFrame中的資料"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 9,
271 | "metadata": {
272 | "collapsed": false
273 | },
274 | "outputs": [],
275 | "source": [
276 | "def fixTable(marketType, table, theDate = datetime.date.today()):\n",
277 | " \n",
278 | " fixedTable = table\n",
279 | " \n",
280 | " # Drop\n",
281 | " fixedTable.drop(['選擇', '凱基證券下單'], axis = 1, inplace = True)\n",
282 | " fixedTable.dropna(axis=0, how='all', inplace=True)\n",
283 | "\n",
284 | " # fill missing data\n",
285 | " fixedTable['股票代號名稱'] = fixedTable['股票代號']\n",
286 | " fixedTable['股票代號'] = fixedTable['股票代號名稱'].map(lambda x: x.split()[0])\n",
287 | " fixedTable['股票名稱'] = fixedTable['股票代號名稱'].map(lambda x: x.split()[1])\n",
288 | " fixedTable['日期'] = theDate\n",
289 | " fixedTable['市場別'] = marketType\n",
290 | " \n",
291 | " # data type\n",
292 | " fixedTable.replace('-', np.nan, inplace = True) \n",
293 | " \n",
294 | " fixedTable['股票代號'] = fixedTable['股票代號'].astype(str)\n",
295 | " fixedTable['時間'] = fixedTable['時間'].astype(datetime.time) \n",
296 | " fixedTable[['成交', '買進', '賣出', '張數', '昨收', '開盤', '最高', '最低']] = \\\n",
297 | " fixedTable[['成交', '買進', '賣出', '張數', '昨收', '開盤', '最高', '最低']].astype(float) \n",
298 | " \n",
299 | " fixedTable['漲跌'] = fixedTable['成交'] - fixedTable['昨收']\n",
300 | " fixedTable['漲跌'] = fixedTable['漲跌'].map(lambda x: round(x, 2))\n",
301 | " \n",
302 | " # sort\n",
303 | "# fixedTable.sort_values(by = '股票代號', inplace = True) \n",
304 | " \n",
305 | " # indexing\n",
306 | " fixedTable.index = Series(range(len(fixedTable)))\n",
307 | " fixedTable.index.name = '項次'\n",
308 | " fixedTable = fixedTable.reindex(columns = ['市場別', '股票代號', '股票名稱', '日期', '時間', '成交', '買進', '賣出', '漲跌', '張數', '昨收', '開盤', '最高', '最低'])\n",
309 | " \n",
310 | " return fixedTable"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": 10,
316 | "metadata": {
317 | "collapsed": false
318 | },
319 | "outputs": [],
320 | "source": [
321 | "df1 = fixTable('TSE', df)"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 11,
327 | "metadata": {
328 | "collapsed": false
329 | },
330 | "outputs": [
331 | {
332 | "data": {
333 | "text/html": [
334 | "\n",
335 | "
\n",
336 | " \n",
337 | " \n",
338 | " | \n",
339 | " 市場別 | \n",
340 | " 股票代號 | \n",
341 | " 股票名稱 | \n",
342 | " 日期 | \n",
343 | " 時間 | \n",
344 | " 成交 | \n",
345 | " 買進 | \n",
346 | " 賣出 | \n",
347 | " 漲跌 | \n",
348 | " 張數 | \n",
349 | " 昨收 | \n",
350 | " 開盤 | \n",
351 | " 最高 | \n",
352 | " 最低 | \n",
353 | "
\n",
354 | " \n",
355 | " 項次 | \n",
356 | " | \n",
357 | " | \n",
358 | " | \n",
359 | " | \n",
360 | " | \n",
361 | " | \n",
362 | " | \n",
363 | " | \n",
364 | " | \n",
365 | " | \n",
366 | " | \n",
367 | " | \n",
368 | " | \n",
369 | " | \n",
370 | "
\n",
371 | " \n",
372 | " \n",
373 | " \n",
374 | " 195 | \n",
375 | " TSE | \n",
376 | " 4763 | \n",
377 | " 材料-KY | \n",
378 | " 2016-10-04 | \n",
379 | " 14:30 | \n",
380 | " 121.50 | \n",
381 | " 121.5 | \n",
382 | " 122.00 | \n",
383 | " 0.00 | \n",
384 | " 994.0 | \n",
385 | " 121.50 | \n",
386 | " 121.0 | \n",
387 | " 128.00 | \n",
388 | " 120.50 | \n",
389 | "
\n",
390 | " \n",
391 | " 196 | \n",
392 | " TSE | \n",
393 | " 1598 | \n",
394 | " 岱宇 | \n",
395 | " 2016-10-04 | \n",
396 | " 13:30 | \n",
397 | " 46.05 | \n",
398 | " 46.0 | \n",
399 | " 46.05 | \n",
400 | " -0.50 | \n",
401 | " 252.0 | \n",
402 | " 46.55 | \n",
403 | " 46.5 | \n",
404 | " 46.95 | \n",
405 | " 46.00 | \n",
406 | "
\n",
407 | " \n",
408 | " 197 | \n",
409 | " TSE | \n",
410 | " 1701 | \n",
411 | " 中化 | \n",
412 | " 2016-10-04 | \n",
413 | " 13:30 | \n",
414 | " 18.10 | \n",
415 | " 18.1 | \n",
416 | " 18.15 | \n",
417 | " -0.05 | \n",
418 | " 218.0 | \n",
419 | " 18.15 | \n",
420 | " 18.2 | \n",
421 | " 18.20 | \n",
422 | " 18.05 | \n",
423 | "
\n",
424 | " \n",
425 | " 198 | \n",
426 | " TSE | \n",
427 | " 1707 | \n",
428 | " 葡萄王 | \n",
429 | " 2016-10-04 | \n",
430 | " 13:30 | \n",
431 | " 261.00 | \n",
432 | " 261.0 | \n",
433 | " 261.50 | \n",
434 | " 3.50 | \n",
435 | " 537.0 | \n",
436 | " 257.50 | \n",
437 | " 261.0 | \n",
438 | " 262.50 | \n",
439 | " 258.00 | \n",
440 | "
\n",
441 | " \n",
442 | " 199 | \n",
443 | " TSE | \n",
444 | " 1720 | \n",
445 | " 生達 | \n",
446 | " 2016-10-04 | \n",
447 | " 13:30 | \n",
448 | " 33.45 | \n",
449 | " 33.4 | \n",
450 | " 33.45 | \n",
451 | " 0.00 | \n",
452 | " 179.0 | \n",
453 | " 33.45 | \n",
454 | " 33.6 | \n",
455 | " 33.60 | \n",
456 | " 33.30 | \n",
457 | "
\n",
458 | " \n",
459 | "
\n",
460 | "
"
461 | ],
462 | "text/plain": [
463 | " 市場別 股票代號 股票名稱 日期 時間 成交 買進 賣出 漲跌 張數 \\\n",
464 | "項次 \n",
465 | "195 TSE 4763 材料-KY 2016-10-04 14:30 121.50 121.5 122.00 0.00 994.0 \n",
466 | "196 TSE 1598 岱宇 2016-10-04 13:30 46.05 46.0 46.05 -0.50 252.0 \n",
467 | "197 TSE 1701 中化 2016-10-04 13:30 18.10 18.1 18.15 -0.05 218.0 \n",
468 | "198 TSE 1707 葡萄王 2016-10-04 13:30 261.00 261.0 261.50 3.50 537.0 \n",
469 | "199 TSE 1720 生達 2016-10-04 13:30 33.45 33.4 33.45 0.00 179.0 \n",
470 | "\n",
471 | " 昨收 開盤 最高 最低 \n",
472 | "項次 \n",
473 | "195 121.50 121.0 128.00 120.50 \n",
474 | "196 46.55 46.5 46.95 46.00 \n",
475 | "197 18.15 18.2 18.20 18.05 \n",
476 | "198 257.50 261.0 262.50 258.00 \n",
477 | "199 33.45 33.6 33.60 33.30 "
478 | ]
479 | },
480 | "execution_count": 11,
481 | "metadata": {},
482 | "output_type": "execute_result"
483 | }
484 | ],
485 | "source": [
486 | "df1.tail(5)"
487 | ]
488 | },
489 | {
490 | "cell_type": "markdown",
491 | "metadata": {
492 | "collapsed": true
493 | },
494 | "source": [
495 | "## 彙整 Yahoo 股市 page 1~ 5 的資料"
496 | ]
497 | },
498 | {
499 | "cell_type": "code",
500 | "execution_count": 12,
501 | "metadata": {
502 | "collapsed": true
503 | },
504 | "outputs": [],
505 | "source": [
506 | "# 抓第一頁~第五頁的資料\n",
507 | "dfs = map(lambda p: fixTable('TSE', getDataOnePageTSE(p)) , range(1, 6))"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": 13,
513 | "metadata": {
514 | "collapsed": false
515 | },
516 | "outputs": [
517 | {
518 | "data": {
519 | "text/plain": [
520 | "1000"
521 | ]
522 | },
523 | "execution_count": 13,
524 | "metadata": {},
525 | "output_type": "execute_result"
526 | }
527 | ],
528 | "source": [
529 | "# Append 在一起\n",
530 | "df = pd.concat(dfs)\n",
531 | "len(df)"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": 14,
537 | "metadata": {
538 | "collapsed": false
539 | },
540 | "outputs": [
541 | {
542 | "data": {
543 | "text/html": [
544 | "\n",
545 | "
\n",
546 | " \n",
547 | " \n",
548 | " | \n",
549 | " 市場別 | \n",
550 | " 股票代號 | \n",
551 | " 股票名稱 | \n",
552 | " 日期 | \n",
553 | " 時間 | \n",
554 | " 成交 | \n",
555 | " 買進 | \n",
556 | " 賣出 | \n",
557 | " 漲跌 | \n",
558 | " 張數 | \n",
559 | " 昨收 | \n",
560 | " 開盤 | \n",
561 | " 最高 | \n",
562 | " 最低 | \n",
563 | "
\n",
564 | " \n",
565 | " \n",
566 | " \n",
567 | " 902 | \n",
568 | " TSE | \n",
569 | " 9941 | \n",
570 | " 裕融 | \n",
571 | " 2016-10-04 | \n",
572 | " 13:30 | \n",
573 | " 71.4 | \n",
574 | " 71.3 | \n",
575 | " 71.40 | \n",
576 | " 0.1 | \n",
577 | " 171.0 | \n",
578 | " 71.3 | \n",
579 | " 71.3 | \n",
580 | " 71.4 | \n",
581 | " 71.2 | \n",
582 | "
\n",
583 | " \n",
584 | " 903 | \n",
585 | " TSE | \n",
586 | " 9942 | \n",
587 | " 茂順 | \n",
588 | " 2016-10-04 | \n",
589 | " 13:18 | \n",
590 | " 86.9 | \n",
591 | " 85.9 | \n",
592 | " 86.40 | \n",
593 | " 1.6 | \n",
594 | " 37.0 | \n",
595 | " 85.3 | \n",
596 | " 85.8 | \n",
597 | " 86.9 | \n",
598 | " 85.2 | \n",
599 | "
\n",
600 | " \n",
601 | " 904 | \n",
602 | " TSE | \n",
603 | " 9944 | \n",
604 | " 新麗 | \n",
605 | " 2016-10-04 | \n",
606 | " 13:30 | \n",
607 | " 24.2 | \n",
608 | " 24.2 | \n",
609 | " 24.50 | \n",
610 | " -0.3 | \n",
611 | " 110.0 | \n",
612 | " 24.5 | \n",
613 | " 24.5 | \n",
614 | " 24.6 | \n",
615 | " 24.2 | \n",
616 | "
\n",
617 | " \n",
618 | " 905 | \n",
619 | " TSE | \n",
620 | " 9945 | \n",
621 | " 潤泰新 | \n",
622 | " 2016-10-04 | \n",
623 | " 14:30 | \n",
624 | " 37.7 | \n",
625 | " 37.7 | \n",
626 | " 37.75 | \n",
627 | " -1.1 | \n",
628 | " 7229.0 | \n",
629 | " 38.8 | \n",
630 | " 38.6 | \n",
631 | " 38.6 | \n",
632 | " 37.6 | \n",
633 | "
\n",
634 | " \n",
635 | " 906 | \n",
636 | " TSE | \n",
637 | " 9955 | \n",
638 | " 佳龍 | \n",
639 | " 2016-10-04 | \n",
640 | " 13:30 | \n",
641 | " 17.6 | \n",
642 | " 17.6 | \n",
643 | " 17.65 | \n",
644 | " 0.0 | \n",
645 | " 73.0 | \n",
646 | " 17.6 | \n",
647 | " 17.8 | \n",
648 | " 17.9 | \n",
649 | " 17.4 | \n",
650 | "
\n",
651 | " \n",
652 | "
\n",
653 | "
"
654 | ],
655 | "text/plain": [
656 | " 市場別 股票代號 股票名稱 日期 時間 成交 買進 賣出 漲跌 張數 昨收 \\\n",
657 | "902 TSE 9941 裕融 2016-10-04 13:30 71.4 71.3 71.40 0.1 171.0 71.3 \n",
658 | "903 TSE 9942 茂順 2016-10-04 13:18 86.9 85.9 86.40 1.6 37.0 85.3 \n",
659 | "904 TSE 9944 新麗 2016-10-04 13:30 24.2 24.2 24.50 -0.3 110.0 24.5 \n",
660 | "905 TSE 9945 潤泰新 2016-10-04 14:30 37.7 37.7 37.75 -1.1 7229.0 38.8 \n",
661 | "906 TSE 9955 佳龍 2016-10-04 13:30 17.6 17.6 17.65 0.0 73.0 17.6 \n",
662 | "\n",
663 | " 開盤 最高 最低 \n",
664 | "902 71.3 71.4 71.2 \n",
665 | "903 85.8 86.9 85.2 \n",
666 | "904 24.5 24.6 24.2 \n",
667 | "905 38.6 38.6 37.6 \n",
668 | "906 17.8 17.9 17.4 "
669 | ]
670 | },
671 | "execution_count": 14,
672 | "metadata": {},
673 | "output_type": "execute_result"
674 | }
675 | ],
676 | "source": [
677 | "df.index = pd.Index(range(len(df))) # 重新編排 row index 編號\n",
678 | "df = df[df['股票代號'].str.len() <= 4] # 濾除 權證 資料\n",
679 | "df.tail()"
680 | ]
681 | },
682 | {
683 | "cell_type": "markdown",
684 | "metadata": {},
685 | "source": [
686 | "## 抓取 類股 資料"
687 | ]
688 | },
689 | {
690 | "cell_type": "code",
691 | "execution_count": 15,
692 | "metadata": {
693 | "collapsed": false
694 | },
695 | "outputs": [
696 | {
697 | "data": {
698 | "text/html": [
699 | "\n",
700 | "
\n",
701 | " \n",
702 | " \n",
703 | " | \n",
704 | " 市場別_ID | \n",
705 | " 類股別_ID | \n",
706 | " 個股_代號 | \n",
707 | " 個股_名稱 | \n",
708 | " 類股_名稱 | \n",
709 | "
\n",
710 | " \n",
711 | " \n",
712 | " \n",
713 | " 29345 | \n",
714 | " 2 | \n",
715 | " 72 | \n",
716 | " 72861P | \n",
717 | " 國泰RG | \n",
718 | " 櫃認售 | \n",
719 | "
\n",
720 | " \n",
721 | " 29346 | \n",
722 | " 2 | \n",
723 | " 72 | \n",
724 | " 72863P | \n",
725 | " 國泰RJ | \n",
726 | " 櫃認售 | \n",
727 | "
\n",
728 | " \n",
729 | " 29347 | \n",
730 | " 2 | \n",
731 | " 72 | \n",
732 | " 72895P | \n",
733 | " 元大P3 | \n",
734 | " 櫃認售 | \n",
735 | "
\n",
736 | " \n",
737 | " 29348 | \n",
738 | " 2 | \n",
739 | " 72 | \n",
740 | " 72901P | \n",
741 | " 工銀QM | \n",
742 | " 櫃認售 | \n",
743 | "
\n",
744 | " \n",
745 | " 29349 | \n",
746 | " 2 | \n",
747 | " 72 | \n",
748 | " 72953P | \n",
749 | " 日盛QW | \n",
750 | " 櫃認售 | \n",
751 | "
\n",
752 | " \n",
753 | "
\n",
754 | "
"
755 | ],
756 | "text/plain": [
757 | " 市場別_ID 類股別_ID 個股_代號 個股_名稱 類股_名稱\n",
758 | "29345 2 72 72861P 國泰RG 櫃認售\n",
759 | "29346 2 72 72863P 國泰RJ 櫃認售\n",
760 | "29347 2 72 72895P 元大P3 櫃認售\n",
761 | "29348 2 72 72901P 工銀QM 櫃認售\n",
762 | "29349 2 72 72953P 日盛QW 櫃認售"
763 | ]
764 | },
765 | "execution_count": 15,
766 | "metadata": {},
767 | "output_type": "execute_result"
768 | }
769 | ],
770 | "source": [
771 | "df_類股 = pd.read_excel('..\\\\data\\個股_類別.xls') # 需先解壓縮 個股_類別.rar\n",
772 | "df_類股.tail()"
773 | ]
774 | },
775 | {
776 | "cell_type": "markdown",
777 | "metadata": {},
778 | "source": [
779 | "## Merge"
780 | ]
781 | },
782 | {
783 | "cell_type": "code",
784 | "execution_count": 16,
785 | "metadata": {
786 | "collapsed": false
787 | },
788 | "outputs": [
789 | {
790 | "data": {
791 | "text/html": [
792 | "\n",
793 | "
\n",
794 | " \n",
795 | " \n",
796 | " | \n",
797 | " 市場別 | \n",
798 | " 股票代號 | \n",
799 | " 股票名稱 | \n",
800 | " 日期 | \n",
801 | " 時間 | \n",
802 | " 成交 | \n",
803 | " 買進 | \n",
804 | " 賣出 | \n",
805 | " 漲跌 | \n",
806 | " 張數 | \n",
807 | " 昨收 | \n",
808 | " 開盤 | \n",
809 | " 最高 | \n",
810 | " 最低 | \n",
811 | " 類股別_ID | \n",
812 | " 類股_名稱 | \n",
813 | "
\n",
814 | " \n",
815 | " \n",
816 | " \n",
817 | " 885 | \n",
818 | " TSE | \n",
819 | " 9941 | \n",
820 | " 裕融 | \n",
821 | " 2016-10-04 | \n",
822 | " 13:30 | \n",
823 | " 71.4 | \n",
824 | " 71.3 | \n",
825 | " 71.40 | \n",
826 | " 0.1 | \n",
827 | " 171.0 | \n",
828 | " 71.3 | \n",
829 | " 71.3 | \n",
830 | " 71.4 | \n",
831 | " 71.2 | \n",
832 | " 6.0 | \n",
833 | " 其他 | \n",
834 | "
\n",
835 | " \n",
836 | " 886 | \n",
837 | " TSE | \n",
838 | " 9942 | \n",
839 | " 茂順 | \n",
840 | " 2016-10-04 | \n",
841 | " 13:18 | \n",
842 | " 86.9 | \n",
843 | " 85.9 | \n",
844 | " 86.40 | \n",
845 | " 1.6 | \n",
846 | " 37.0 | \n",
847 | " 85.3 | \n",
848 | " 85.8 | \n",
849 | " 86.9 | \n",
850 | " 85.2 | \n",
851 | " 6.0 | \n",
852 | " 其他 | \n",
853 | "
\n",
854 | " \n",
855 | " 887 | \n",
856 | " TSE | \n",
857 | " 9944 | \n",
858 | " 新麗 | \n",
859 | " 2016-10-04 | \n",
860 | " 13:30 | \n",
861 | " 24.2 | \n",
862 | " 24.2 | \n",
863 | " 24.50 | \n",
864 | " -0.3 | \n",
865 | " 110.0 | \n",
866 | " 24.5 | \n",
867 | " 24.5 | \n",
868 | " 24.6 | \n",
869 | " 24.2 | \n",
870 | " 6.0 | \n",
871 | " 其他 | \n",
872 | "
\n",
873 | " \n",
874 | " 888 | \n",
875 | " TSE | \n",
876 | " 9945 | \n",
877 | " 潤泰新 | \n",
878 | " 2016-10-04 | \n",
879 | " 14:30 | \n",
880 | " 37.7 | \n",
881 | " 37.7 | \n",
882 | " 37.75 | \n",
883 | " -1.1 | \n",
884 | " 7229.0 | \n",
885 | " 38.8 | \n",
886 | " 38.6 | \n",
887 | " 38.6 | \n",
888 | " 37.6 | \n",
889 | " 6.0 | \n",
890 | " 其他 | \n",
891 | "
\n",
892 | " \n",
893 | " 889 | \n",
894 | " TSE | \n",
895 | " 9955 | \n",
896 | " 佳龍 | \n",
897 | " 2016-10-04 | \n",
898 | " 13:30 | \n",
899 | " 17.6 | \n",
900 | " 17.6 | \n",
901 | " 17.65 | \n",
902 | " 0.0 | \n",
903 | " 73.0 | \n",
904 | " 17.6 | \n",
905 | " 17.8 | \n",
906 | " 17.9 | \n",
907 | " 17.4 | \n",
908 | " 6.0 | \n",
909 | " 其他 | \n",
910 | "
\n",
911 | " \n",
912 | "
\n",
913 | "
"
914 | ],
915 | "text/plain": [
916 | " 市場別 股票代號 股票名稱 日期 時間 成交 買進 賣出 漲跌 張數 昨收 \\\n",
917 | "885 TSE 9941 裕融 2016-10-04 13:30 71.4 71.3 71.40 0.1 171.0 71.3 \n",
918 | "886 TSE 9942 茂順 2016-10-04 13:18 86.9 85.9 86.40 1.6 37.0 85.3 \n",
919 | "887 TSE 9944 新麗 2016-10-04 13:30 24.2 24.2 24.50 -0.3 110.0 24.5 \n",
920 | "888 TSE 9945 潤泰新 2016-10-04 14:30 37.7 37.7 37.75 -1.1 7229.0 38.8 \n",
921 | "889 TSE 9955 佳龍 2016-10-04 13:30 17.6 17.6 17.65 0.0 73.0 17.6 \n",
922 | "\n",
923 | " 開盤 最高 最低 類股別_ID 類股_名稱 \n",
924 | "885 71.3 71.4 71.2 6.0 其他 \n",
925 | "886 85.8 86.9 85.2 6.0 其他 \n",
926 | "887 24.5 24.6 24.2 6.0 其他 \n",
927 | "888 38.6 38.6 37.6 6.0 其他 \n",
928 | "889 17.8 17.9 17.4 6.0 其他 "
929 | ]
930 | },
931 | "execution_count": 16,
932 | "metadata": {},
933 | "output_type": "execute_result"
934 | }
935 | ],
936 | "source": [
937 | "mdf = df.merge(df_類股, left_on = '股票代號', right_on = '個股_代號', how = 'left') # merge\n",
938 | "mdf = mdf.drop(['市場別_ID', '個股_代號', '個股_名稱'], axis = 1) # drop 多於的欄位\n",
939 | "mdf.tail()"
940 | ]
941 | },
942 | {
943 | "cell_type": "markdown",
944 | "metadata": {},
945 | "source": [
946 | "## GroupBy"
947 | ]
948 | },
949 | {
950 | "cell_type": "code",
951 | "execution_count": 17,
952 | "metadata": {
953 | "collapsed": false
954 | },
955 | "outputs": [
956 | {
957 | "data": {
958 | "text/plain": [
959 | "類股_名稱\n",
960 | "光電 69\n",
961 | "其他 46\n",
962 | "其它電子 32\n",
963 | "化工 25\n",
964 | "半導體 64\n",
965 | "塑膠 22\n",
966 | "憑證 7\n",
967 | "橡膠 10\n",
968 | "水泥 7\n",
969 | "汽車 6\n",
970 | "油電燃氣 8\n",
971 | "營建 48\n",
972 | "玻璃 4\n",
973 | "生技醫療 20\n",
974 | "紡織 46\n",
975 | "航運運輸 21\n",
976 | "觀光 13\n",
977 | "貿易百貨 11\n",
978 | "資訊服務 13\n",
979 | "通信網路 39\n",
980 | "造紙 7\n",
981 | "金融 33\n",
982 | "鋼鐵 30\n",
983 | "電器電纜 15\n",
984 | "電子通路 23\n",
985 | "電子零組件 81\n",
986 | "電機 43\n",
987 | "電腦週邊 60\n",
988 | "食品 21\n",
989 | "dtype: int64"
990 | ]
991 | },
992 | "execution_count": 17,
993 | "metadata": {},
994 | "output_type": "execute_result"
995 | }
996 | ],
997 | "source": [
998 | "# 各類股有多少支個股\n",
999 | "mdf.groupby(['類股_名稱']).size().sort_index()"
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": 18,
1005 | "metadata": {
1006 | "collapsed": false
1007 | },
1008 | "outputs": [
1009 | {
1010 | "data": {
1011 | "text/plain": [
1012 | "類股_名稱\n",
1013 | "光電 75.766667\n",
1014 | "其他 56.844130\n",
1015 | "其它電子 42.225000\n",
1016 | "化工 27.510400\n",
1017 | "半導體 50.935156\n",
1018 | "塑膠 28.105909\n",
1019 | "憑證 3.641429\n",
1020 | "橡膠 34.415000\n",
1021 | "水泥 19.055714\n",
1022 | "汽車 128.300000\n",
1023 | "油電燃氣 38.787500\n",
1024 | "營建 18.451250\n",
1025 | "玻璃 10.752500\n",
1026 | "生技醫療 54.405000\n",
1027 | "紡織 27.713696\n",
1028 | "航運運輸 15.499524\n",
1029 | "觀光 69.121538\n",
1030 | "貿易百貨 40.754545\n",
1031 | "資訊服務 35.570000\n",
1032 | "通信網路 38.973846\n",
1033 | "造紙 14.381429\n",
1034 | "金融 15.520606\n",
1035 | "鋼鐵 16.662333\n",
1036 | "電器電纜 11.322000\n",
1037 | "電子通路 34.553478\n",
1038 | "電子零組件 38.924321\n",
1039 | "電機 67.288837\n",
1040 | "電腦週邊 48.827833\n",
1041 | "食品 39.721905\n",
1042 | "Name: 成交, dtype: float64"
1043 | ]
1044 | },
1045 | "execution_count": 18,
1046 | "metadata": {},
1047 | "output_type": "execute_result"
1048 | }
1049 | ],
1050 | "source": [
1051 | "# 各類股 平均股價\n",
1052 | "mdf.groupby(['類股_名稱'])['成交'].mean().sort_index()"
1053 | ]
1054 | }
1055 | ],
1056 | "metadata": {
1057 | "anaconda-cloud": {},
1058 | "kernelspec": {
1059 | "display_name": "Python [default]",
1060 | "language": "python",
1061 | "name": "python3"
1062 | },
1063 | "language_info": {
1064 | "codemirror_mode": {
1065 | "name": "ipython",
1066 | "version": 3
1067 | },
1068 | "file_extension": ".py",
1069 | "mimetype": "text/x-python",
1070 | "name": "python",
1071 | "nbconvert_exporter": "python",
1072 | "pygments_lexer": "ipython3",
1073 | "version": "3.5.1"
1074 | }
1075 | },
1076 | "nbformat": 4,
1077 | "nbformat_minor": 0
1078 | }
1079 |
--------------------------------------------------------------------------------