├── _config.yml
├── code_snippets
    ├── python
    │   ├── move_files
    │   │   ├── dir1
    │   │   │   └── example.txt
    │   │   ├── dir2
    │   │   │   └── .gitkeep
    │   │   └── move_files.py
    │   ├── pathlib_write
    │   │   ├── greeting
    │   │   └── path_test.py
    │   ├── path_parents
    │   │   ├── test1
    │   │   │   └── test2
    │   │   │   │   └── test.txt
    │   │   └── path_parents.py
    │   ├── pathlib_iterate_files_end_with
    │   │   ├── data
    │   │   │   ├── data1.csv
    │   │   │   ├── data2.csv
    │   │   │   └── data3.csv
    │   │   └── main.py
    │   ├── pathlib_path
    │   │   ├── data
    │   │   │   └── processed
    │   │   │   │   ├── data1.csv
    │   │   │   │   └── data2.csv
    │   │   └── main.py
    │   ├── main_example
    │   │   └── salulation
    │   │   │   └── __main__.py
    │   ├── warnings_example.py
    │   ├── any_example.py
    │   ├── underscore_large_number.py
    │   ├── dates_in_month_as_feature.py
    │   ├── extended_iterable_unpacking.py
    │   ├── ignore_variables.py
    │   ├── underscore_for_loop.py
    │   ├── itertools_islice.py
    │   ├── random_choice.py
    │   ├── random_sample.py
    │   ├── fractions_example.py
    │   ├── join_list.py
    │   ├── unpack_iterables.py
    │   ├── check_if_number.py
    │   ├── division_operators.py
    │   ├── key_in_max.py
    │   ├── boolean_operators.py
    │   ├── list_extend.py
    │   ├── list_map.py
    │   ├── zip_example.py
    │   ├── eval_example.py
    │   ├── print_error.py
    │   ├── set_intersection.py
    │   ├── assert_customize_message.py
    │   ├── kwargs.py
    │   ├── collections_defaultdict.py
    │   ├── os_system.py
    │   ├── partial_function.py
    │   ├── copy_method.py
    │   ├── re_sub_example.py
    │   ├── zip_function.py
    │   ├── pass_statement.py
    │   ├── set_difference.py
    │   ├── slice_example.py
    │   ├── filter_example.py
    │   ├── return_multiple_values_with_dictionary.py
    │   ├── collections_ordereddict.py
    │   ├── __call__example.py
    │   ├── enumerate_example.py
    │   ├── multiprocessing_example.py
    │   ├── string_find.py
    │   ├── namedtuple_example.py
    │   ├── compare_execution_time.py
    │   ├── defaultdict_example.py
    │   ├── __str__and__repr.py
    │   ├── datetime_timedelta.py
    │   ├── improve_json_readability.py
    │   ├── itertools_combinations_example.py
    │   ├── list_comprehension.py
    │   ├── staticmethod_example.py
    │   ├── multiples_of_a_number.py
    │   ├── collections_counter.py
    │   ├── getattr_example.py
    │   ├── args_example.py
    │   ├── abc_example.py
    │   ├── built_in_functions_speed.py
    │   ├── classmethod_example.py
    │   ├── argparse_example.py
    │   ├── decorator_example.py
    │   ├── itertools_examples.py
    │   ├── property_decorator.py
    │   ├── heapq_example.py
    │   └── singledispatch_example.py
    ├── terminal
    │   ├── check_if_library_is_installed.sh
    │   ├── sed_command
    │   │   ├── weather.txt
    │   │   └── sed_command.sh
    │   ├── conda_rollback.sh
    │   ├── environment_variables
    │   │   ├── main.py
    │   │   └── .bash_profile
    │   ├── download_github_file.sh
    │   ├── gh_cli.sh
    │   ├── virtualenv_clone.sh
    │   ├── tee_example.sh
    │   ├── timeit_command_line.sh
    │   ├── make_file_immutable.sh
    │   ├── bash_for_loop.sh
    │   ├── create_symbolic_link.sh
    │   └── tr_command.sh
    ├── cool_tools
    │   ├── notify_send.sh
    │   ├── rich_inspect.py
    │   ├── pydash_chunk.py
    │   ├── install_forked.sh
    │   ├── typer_example.py
    │   ├── tqdm_set_description.py
    │   ├── ngrok_example.sh
    │   ├── box_example.py
    │   ├── strip_interactive_example.py
    │   ├── snsscrape_example.sh
    │   ├── heart_rate.py
    │   ├── mypy_example.py
    │   ├── yarl_example.py
    │   ├── knockknock_example.py
    │   ├── schedule_example.py
    │   ├── rich_console.py
    │   ├── icecream_example.py
    │   ├── loguru_example.py
    │   ├── icecream_datetime.py
    │   ├── pygithub_example.py
    │   ├── pyfiglet_example.py
    │   ├── getme_forecast.sh
    │   ├── decorator_module.py
    │   ├── notion_example.py
    │   ├── isort_example.py
    │   └── interrogate_example.py
    ├── data_science_tools
    │   ├── dotenv_example
    │   │   ├── .env
    │   │   └── example.py
    │   ├── pytest_repeat.py
    │   ├── hydra_example
    │   │   ├── config.yml
    │   │   └── main.py
    │   ├── fetch_openml_example.py
    │   ├── difflib_example.py
    │   ├── sklearn_rmse.py
    │   ├── gdown_example.py
    │   ├── distfit_example.py
    │   ├── extract_various_data.py
    │   ├── faker_example.py
    │   ├── maya_example.py
    │   ├── pytest_benchmark_example.py
    │   ├── snoop_example.py
    │   ├── streamlit_app.py
    │   ├── extract_holidays.py
    │   ├── convert_number_to_words.py
    │   ├── tqdm_example.py
    │   ├── geopy_example.py
    │   ├── wordfreq_example.py
    │   ├── dtreeviz_example.py
    │   ├── pytest_example.py
    │   ├── dvc_example.sh
    │   ├── pytest_parametrize.py
    │   ├── getname_example.py
    │   ├── graphviz_example.py
    │   ├── pytest_fixture.py
    │   ├── datefinder_example.py
    │   ├── fastai_cont_cat_split.py
    │   ├── texthero_examples.py
    │   ├── add_datepart_example.py
    │   ├── autoscraper_example.py
    │   ├── docopt_example.py
    │   ├── datapane_example.py
    │   ├── textblob_example.py
    │   ├── causalimpact_example.py
    │   ├── swifter_example.py
    │   ├── pipeline_gridsearchcv.py
    │   ├── spacy_ngram.py
    │   ├── pandera_example.py
    │   ├── kedro_example.py
    │   ├── fastai_df_shrink.py
    │   ├── compare_2_datasets
    │   │   └── compare_datasets.ipynb
    │   ├── mito_example
    │   │   ├── mito_example.ipynb
    │   │   └── iris.csv
    │   ├── newspaper3k.ipynb
    │   └── folium_example.ipynb
    ├── numpy
    │   ├── np_argmax.py
    │   ├── np_ravel.py
    │   ├── double_np_argsort.py
    │   ├── np_where.py
    │   ├── use_list_to_change_position_of_arrays.py
    │   ├── np_comparison.py
    │   ├── np_array_all.py
    │   ├── any_all.py
    │   ├── np_linspace.py
    │   ├── numpy_testing_almost_equal.py
    │   └── array_to_latex_example.ipynb
    ├── pandas
    │   ├── parse_dates.py
    │   ├── df_explode.py
    │   ├── pd_dataframe_iterrows.py
    │   ├── pd_Series_map.py
    │   ├── pd_series_between.py
    │   ├── df_columns_str_start_with.py
    │   ├── pd_dataframe_agg.py
    │   ├── pd_series_dt.py
    │   ├── pd_groupby_sample.py
    │   ├── df_agg.py
    │   ├── pct_change.py
    │   ├── exclude_outliers.py
    │   ├── df_assign.py
    │   ├── pd_cut.py
    │   ├── pd_reindex.py
    │   ├── select_columns_start_with.py
    │   ├── df_fillna.py
    │   ├── s.is_in.py
    │   ├── pd_series_str.py
    │   ├── df_pivot.py
    │   ├── set_categories.py
    │   ├── assert_frame_equal.py
    │   ├── df_diff.py
    │   ├── df_datetime_comparison.py
    │   ├── get_data_in_year_range.py
    │   ├── df_merge.py
    │   ├── df_rolling.py
    │   ├── reduce_memory.py
    │   ├── dataframe_pipe.py
    │   ├── select_dtypes.py
    │   └── highlight_pandas.ipynb
    └── notebook
    │   ├── display_math_equations.ipynb
    │   └── watermark_example.ipynb
├── images
    ├── cool.png
    ├── numpy.png
    ├── panda.png
    ├── python.png
    ├── notebook.png
    ├── command-window.png
    └── data-science.png
├── .gitignore
├── .pre-commit-config.yaml
└── README.md


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-merlot


--------------------------------------------------------------------------------
/code_snippets/python/move_files/dir1/example.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/move_files/dir2/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_write/greeting:
--------------------------------------------------------------------------------
1 | Hello!


--------------------------------------------------------------------------------
/code_snippets/python/path_parents/test1/test2/test.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_iterate_files_end_with/data/data1.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_iterate_files_end_with/data/data2.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_iterate_files_end_with/data/data3.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_path/data/processed/data1.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | 1,2


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_path/data/processed/data2.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | 1,2


--------------------------------------------------------------------------------
/code_snippets/terminal/check_if_library_is_installed.sh:
--------------------------------------------------------------------------------
1 | python -c 'import pandas'


--------------------------------------------------------------------------------
/code_snippets/python/main_example/salulation/__main__.py:
--------------------------------------------------------------------------------
1 | print("Hello")
2 | print("bye")


--------------------------------------------------------------------------------
/code_snippets/python/warnings_example.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings('ignore')


--------------------------------------------------------------------------------
/code_snippets/terminal/sed_command/weather.txt:
--------------------------------------------------------------------------------
1 | Today is a sunny day. I want to be outside.


--------------------------------------------------------------------------------
/code_snippets/cool_tools/notify_send.sh:
--------------------------------------------------------------------------------
1 | python file_to_run.py ; notify-send "Process terminated"


--------------------------------------------------------------------------------
/code_snippets/python/any_example.py:
--------------------------------------------------------------------------------
1 | text = 'abcdE'
2 | print(any(c for c in text if c.isupper()))


--------------------------------------------------------------------------------
/code_snippets/terminal/conda_rollback.sh:
--------------------------------------------------------------------------------
1 | conda list --revisions
2 | conda install --revisions N


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/dotenv_example/.env:
--------------------------------------------------------------------------------
1 | USERNAME=my_user_name
2 | PASSWORD=secret_password


--------------------------------------------------------------------------------
/code_snippets/python/underscore_large_number.py:
--------------------------------------------------------------------------------
1 | large_num = 1_000_000
2 | print(large_num)
3 | # 1000000


--------------------------------------------------------------------------------
/code_snippets/cool_tools/rich_inspect.py:
--------------------------------------------------------------------------------
1 | from rich import inspect
2 | 
3 | print(inspect('hello', methods=True))


--------------------------------------------------------------------------------
/images/cool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/cool.png


--------------------------------------------------------------------------------
/code_snippets/python/move_files/move_files.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | 
3 | shutil.move('dir1/example.txt', 'dir2')
4 | 
5 | 


--------------------------------------------------------------------------------
/images/numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/numpy.png


--------------------------------------------------------------------------------
/images/panda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/panda.png


--------------------------------------------------------------------------------
/images/python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/python.png


--------------------------------------------------------------------------------
/code_snippets/numpy/np_argmax.py:
--------------------------------------------------------------------------------
1 | import numpy as np 
2 | a = np.array([0.2, 0.4, 0.7, 0.3])
3 | print(np.argmax(a))
4 | # 2


--------------------------------------------------------------------------------
/code_snippets/python/dates_in_month_as_feature.py:
--------------------------------------------------------------------------------
1 | import calendar 
2 | 
3 | print(calendar.monthrange(2020, 11)[1])
4 | # 30


--------------------------------------------------------------------------------
/code_snippets/terminal/environment_variables/main.py:
--------------------------------------------------------------------------------
1 | import os 
2 | 
3 | os.getenv("SECRET_ACCESS_KEY")
4 | # yourkeyhere


--------------------------------------------------------------------------------
/images/notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/notebook.png


--------------------------------------------------------------------------------
/code_snippets/python/extended_iterable_unpacking.py:
--------------------------------------------------------------------------------
1 | a, *_, b = [1, 2, 3, 4]
2 | print(a) # 1
3 | print(b) # 4
4 | print(_) # [2, 3]


--------------------------------------------------------------------------------
/code_snippets/python/ignore_variables.py:
--------------------------------------------------------------------------------
1 | def return_two():
2 |     return 1, 2
3 | 
4 | _, var = return_two()
5 | print(var)
6 | # 2


--------------------------------------------------------------------------------
/images/command-window.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/command-window.png


--------------------------------------------------------------------------------
/images/data-science.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/data-science.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | __pycache__
3 | workera.py 
4 | venv 
5 | create_snippet.sh
6 | .pytest_cache
7 | .vscode
8 | .config


--------------------------------------------------------------------------------
/code_snippets/pandas/parse_dates.py:
--------------------------------------------------------------------------------
1 | import pandas as pd 
2 | 
3 | df = pd.read_csv('data1.csv', parse_dates=['date_column_1', 'date_column_2'])


--------------------------------------------------------------------------------
/code_snippets/cool_tools/pydash_chunk.py:
--------------------------------------------------------------------------------
1 | from pydash import py_
2 | 
3 | a = [1, 2, 3, 4, 5]
4 | print(py_.chunk(a, 2))  # [[1, 2], [3, 4], [5]]
5 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/environment_variables/.bash_profile:
--------------------------------------------------------------------------------
1 | # Save this in ~/.bash_profile
2 | 
3 | export SECRET_ACCESS_KEY=yourkeyhere
4 | 
5 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/download_github_file.sh:
--------------------------------------------------------------------------------
1 | wget https://raw.githubusercontent.com/khuyentran1401/Data-science/master/visualization/dropdown/population.csv
2 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/gh_cli.sh:
--------------------------------------------------------------------------------
1 | cd your_local_folder
2 | 
3 | # Create an empty local git repo
4 | git init
5 | 
6 | # Create a new GitHub repo
7 | gh repo create


--------------------------------------------------------------------------------
/code_snippets/terminal/virtualenv_clone.sh:
--------------------------------------------------------------------------------
1 | pip install virtualenv-clone
2 | virtualenv-clone old_venv/ new_venv/
3 | 
4 | source new_venv/bin/activate
5 | pip list


--------------------------------------------------------------------------------
/code_snippets/pandas/df_explode.py:
--------------------------------------------------------------------------------
1 | import pandas as pd 
2 | 
3 | df = pd.DataFrame({'a': [[1, 2], [4, 5]], 'b': [11, 13]})
4 | print(df)
5 | 
6 | print(df.explode('a'))


--------------------------------------------------------------------------------
/code_snippets/python/underscore_for_loop.py:
--------------------------------------------------------------------------------
 1 | for _ in range(5):
 2 |     print('Hello')
 3 | 
 4 | """ 
 5 | Hello
 6 | Hello
 7 | Hello
 8 | Hello
 9 | Hello
10 | """


--------------------------------------------------------------------------------
/code_snippets/numpy/np_ravel.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | arr = np.array([[1, 2], [3, 41]])
3 | print(arr)
4 | 
5 | print(np.ravel(arr))
6 | 
7 | print(np.ravel(arr, order="F"))


--------------------------------------------------------------------------------
/code_snippets/python/itertools_islice.py:
--------------------------------------------------------------------------------
1 | from itertools import islice
2 | 
3 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
4 | new_a = list(islice(a, 1, 7, 2))
5 | print(new_a)
6 | 


--------------------------------------------------------------------------------
/code_snippets/python/random_choice.py:
--------------------------------------------------------------------------------
1 | import random 
2 | 
3 | to_do_tonight = ['stay at home', 'attend party', 'do exercise']
4 | 
5 | print(random.choice(to_do_tonight))


--------------------------------------------------------------------------------
/code_snippets/python/random_sample.py:
--------------------------------------------------------------------------------
1 | import random
2 | 
3 | random.seed(1)
4 | nums = [1, 2, 3, 4, 5]
5 | random_nums = random.sample(nums, 2)
6 | print(random_nums)
7 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/tee_example.sh:
--------------------------------------------------------------------------------
1 | # command | tee filename.txt
2 | nvidia-smi | tee system_information.txt # write
3 | uname -a | tee -a system_information.txt # append


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pytest_repeat.py:
--------------------------------------------------------------------------------
1 | # pip install pytest-repeat
2 | import pytest 
3 | 
4 | @pytest.mark.repeat(100)
5 | def test_instance_generator():
6 |     pass


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/hydra_example/config.yml:
--------------------------------------------------------------------------------
1 | data: data1 
2 | variables: 
3 |   drop_features: ['iid', 'id', 'idg', 'wave']
4 |   categorical_vars: ['undergra', 'zipcode']


--------------------------------------------------------------------------------
/code_snippets/python/fractions_example.py:
--------------------------------------------------------------------------------
1 | from fractions import Fraction
2 | 
3 | print(2 / 3 + 1)
4 | # 1.6666666666666665
5 | 
6 | print(Fraction(2 / 3 + 1).limit_denominator())
7 | # 5/3


--------------------------------------------------------------------------------
/code_snippets/cool_tools/install_forked.sh:
--------------------------------------------------------------------------------
1 | # pip install -e git+https://github.com/username/package.git#egg=package 
2 | pip install -e git+https://github.com/khuyentran1401/numpy.git#egg=numpy 


--------------------------------------------------------------------------------
/code_snippets/numpy/double_np_argsort.py:
--------------------------------------------------------------------------------
1 | import numpy as np 
2 | 
3 | a = np.array([2, 1, 4, 7, 3])
4 | 
5 | # Get rank of values in an array
6 | print(a.argsort().argsort())
7 | # [1 0 3 4 2]


--------------------------------------------------------------------------------
/code_snippets/python/join_list.py:
--------------------------------------------------------------------------------
1 | fruits = ['apples', 'oranges', 'grapes']
2 | 
3 | fruits_str = ', '.join(fruits)
4 | 
5 | print(f"Today, I need to get some {fruits_str} in the grocery store")


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_write/path_test.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path 
2 | 
3 | file = Path('data')
4 | file.open('w').write('Hello!')
5 | 
6 | new_p = file.rename(Path("greeting"))
7 | 
8 | 


--------------------------------------------------------------------------------
/code_snippets/python/unpack_iterables.py:
--------------------------------------------------------------------------------
1 | nested_arr = [[1,2,3], ['a','b'], 4]
2 | num_arr, char_arr, num = nested_arr
3 | 
4 | print(num_arr)
5 | # [1, 2, 3]
6 | 
7 | print(char_arr)
8 | # ['a', 'b']


--------------------------------------------------------------------------------
/code_snippets/python/check_if_number.py:
--------------------------------------------------------------------------------
 1 | from numbers import Number 
 2 | 
 3 | a = 2
 4 | b = 0.4 
 5 | 
 6 | print(isinstance(a, Number))
 7 | # True
 8 | 
 9 | print(isinstance(b, Number))
10 | # True


--------------------------------------------------------------------------------
/code_snippets/python/division_operators.py:
--------------------------------------------------------------------------------
1 | # Get a division
2 | print(5 / 2)  # 2.5
3 | 
4 | # Get remainder of a division
5 | print(5 % 2)  # 1
6 | 
7 | # Get floor division
8 | print(5 // 2)  # 2
9 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/dotenv_example/example.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | import os 
3 | 
4 | load_dotenv()
5 | PASSWORD = os.getenv('PASSWORD')
6 | print(PASSWORD)
7 | # secret_password


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/fetch_openml_example.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_openml
2 | 
3 | monk = fetch_openml(name='monks-problems-2', as_frame=True)
4 | print(monk['data'].head(10))
5 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_dataframe_iterrows.py:
--------------------------------------------------------------------------------
1 | import pandas as pd 
2 | 
3 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
4 | for idx, row in df.iterrows():
5 |     print(f"a: {row['a']}, b: {row['b']}")


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/difflib_example.py:
--------------------------------------------------------------------------------
1 | from difflib import SequenceMatcher
2 | 
3 | text1 = 'I am Khuyen'
4 | text2 = 'I am Khuen'
5 | print(SequenceMatcher(a=text1, b=text2).ratio())
6 | 0.9523809523809523


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_Series_map.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | s = pd.Series(["a", "b", "c"])
 4 | 
 5 | print(s.map({"a": 1, "b": 2, "c": 3}))
 6 | """
 7 | 0    1
 8 | 1    2
 9 | 2    3
10 | dtype: int64
11 | """
12 | 


--------------------------------------------------------------------------------
/code_snippets/python/key_in_max.py:
--------------------------------------------------------------------------------
1 | birth_year = {"Ben": 1997, "Alex": 2000, "Oliver": 1995}
2 | 
3 | print(max(birth_year))
4 | # Oliver
5 | 
6 | max_val = max(birth_year, key=lambda k: birth_year[k])
7 | print(max_val)
8 | # Alex


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_path/main.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path 
2 | import pandas as pd
3 | 
4 | DATA_DIR = Path('data', 'processed')
5 | 
6 | df1 = pd.read_csv(DATA_DIR / 'data1.csv')
7 | df2 = pd.read_csv(DATA_DIR / 'data2.csv')


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_series_between.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | s = pd.Series([5, 2, 15, 13, 6, 10])
 4 | 
 5 | print(s[s.between(0, 10)])
 6 | """ 
 7 | 0     5
 8 | 1     2
 9 | 4     6
10 | 5    10
11 | dtype: int64
12 | """


--------------------------------------------------------------------------------
/code_snippets/terminal/timeit_command_line.sh:
--------------------------------------------------------------------------------
1 | python -m timeit "arr = []
2 | for i in range(100):
3 |     arr.append(i)"
4 | 
5 | python -m timeit "arr = [i for i in range(100)]"
6 | 
7 | python -m timeit "arr = list(range(100))"
8 | 
9 | 


--------------------------------------------------------------------------------
/code_snippets/python/boolean_operators.py:
--------------------------------------------------------------------------------
 1 | movie_available = True
 2 | have_money = False
 3 | 
 4 | get_excited = movie_available | have_money
 5 | print(get_excited)
 6 | 
 7 | 
 8 | buy = movie_available & have_money
 9 | print(buy)
10 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/make_file_immutable.sh:
--------------------------------------------------------------------------------
1 | touch important_file.txt 
2 | 
3 | # Make the file immutable
4 | sudo chattr +i important_file.txt 
5 | 
6 | # Check the file attributes
7 | lsattr important_file.txt 
8 | 
9 | rm important_file.txt


--------------------------------------------------------------------------------
/code_snippets/cool_tools/typer_example.py:
--------------------------------------------------------------------------------
1 | import typer 
2 | 
3 | def process_data(data: str, version: int):
4 |     print(f'Processing {data},' 
5 |           f'version {version}')
6 | 
7 | if __name__ == '__main__':
8 |     typer.run(process_data)


--------------------------------------------------------------------------------
/code_snippets/python/list_extend.py:
--------------------------------------------------------------------------------
 1 | # Add a list to a list
 2 | a = [1, 2, 3, 4]
 3 | a.append([5, 6])
 4 | print(a)  # [1, 2, 3, 4, [5, 6]]
 5 | 
 6 | 
 7 | a = [1, 2, 3, 4]
 8 | a.extend([5, 6])
 9 | 
10 | print(a)  # [1, 2, 3, 4, 5, 6]
11 | 


--------------------------------------------------------------------------------
/code_snippets/python/list_map.py:
--------------------------------------------------------------------------------
 1 | nums = [1, 2, 3]
 2 | print(list(map(str, nums)))  # ['1', '2', '3']
 3 | 
 4 | 
 5 | def multiply_by_two(num: float):
 6 |     return num * 2
 7 | 
 8 | 
 9 | print(list(map(multiply_by_two, nums)))  # [2, 4, 6]
10 | 


--------------------------------------------------------------------------------
/code_snippets/python/zip_example.py:
--------------------------------------------------------------------------------
 1 | nums = [1, 2, 3, 4]
 2 | string = "abcd"
 3 | combinations = list(zip(nums, string))
 4 | for comb in combinations:
 5 |     print(comb)
 6 | """ 
 7 | (1, 'a')
 8 | (2, 'b')
 9 | (3, 'c')
10 | (4, 'd')
11 | """
12 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/bash_for_loop.sh:
--------------------------------------------------------------------------------
 1 | datas=(1 3 5)
 2 | for data in ${datas[@]}
 3 | do
 4 | echo Processing data $data 
 5 | # python process.py data=$data 
 6 | done 
 7 | 
 8 | # Processing data 1
 9 | # Processing data 3
10 | # Processing data 5


--------------------------------------------------------------------------------
/code_snippets/cool_tools/tqdm_set_description.py:
--------------------------------------------------------------------------------
1 | from time import sleep
2 | 
3 | from tqdm import tqdm
4 | 
5 | fruits = tqdm(["apple", "orange", "grape"])
6 | for fruit in fruits:
7 |     sleep(0.3)
8 |     fruits.set_description(f"Picking {fruit}")
9 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/sklearn_rmse.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import mean_squared_error
2 | 
3 | y_actual = [1, 2, 3]
4 | y_predicted = [1.5, 2.5, 3.5]
5 | rmse = mean_squared_error(y_actual, y_predicted, squared=False)
6 | print(rmse)  # 0.5
7 | 


--------------------------------------------------------------------------------
/code_snippets/python/eval_example.py:
--------------------------------------------------------------------------------
 1 | variable_1 = 'a'
 2 | variable_2 = 'b'
 3 | variable_3 = 'c'
 4 | 
 5 | print(eval('variable_1'))
 6 | # a
 7 | 
 8 | variables = [eval(f'variable_{i}') for i in range(1, 4)]
 9 | print(variables)
10 | # ['a', 'b', 'c']


--------------------------------------------------------------------------------
/code_snippets/python/print_error.py:
--------------------------------------------------------------------------------
 1 | arr = {'a': [1, 2], 'b': 1}
 2 | for key, val in arr.items():
 3 |     try:
 4 |         print(val[0])
 5 |     except Exception as e:
 6 |         print(e)
 7 | """ 
 8 | 1
 9 | 'int' object is not subscriptable
10 | """


--------------------------------------------------------------------------------
/code_snippets/python/path_parents/path_parents.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path 
2 | 
3 | file = 'test1/test2/test.txt'
4 | print(Path(file).parents[0])
5 | print(Path(file).parents[1])
6 | print(Path(file).parents[2])
7 | print(Path(file).parents[2].resolve())
8 | 


--------------------------------------------------------------------------------
/code_snippets/python/set_intersection.py:
--------------------------------------------------------------------------------
1 | requirement1 = ['pandas', 'numpy', 'statsmodel']
2 | requirement2 = ['numpy', 'statsmodel', 'sympy', 'matplotlib']
3 | 
4 | intersection = set.intersection(set(requirement1), set(requirement2))
5 | print(list(intersection))


--------------------------------------------------------------------------------
/code_snippets/python/assert_customize_message.py:
--------------------------------------------------------------------------------
1 | def division(num1: int, num2: int):
2 |     assert num2 != 0, "“num2 must be different from 0"
3 |     return num1 / num2
4 | 
5 | 
6 | division(2, 0)
7 | """ 
8 | AssertionError: “num2 must be different from 0
9 | """


--------------------------------------------------------------------------------
/code_snippets/numpy/np_where.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | arr = np.array([[1, 4, 10, 15], [2, 3, 8, 9]])
 4 | 
 5 | # Multiply values that are less than 5 by 2
 6 | print(np.where(arr < 5, arr * 2, arr))
 7 | """
 8 | [[ 2  8 10 15]
 9 |  [ 4  6  8  9]]
10 | """
11 | 


--------------------------------------------------------------------------------
/code_snippets/python/kwargs.py:
--------------------------------------------------------------------------------
 1 | parameters = {'a': 1, 'b': 2}
 2 | def example(c, **kwargs):
 3 |     print(kwargs)
 4 |     for val in kwargs.values():
 5 |         print(c + val)
 6 | 
 7 | example(c=3, **parameters)
 8 | """ 
 9 | {'a': 1, 'b': 2}
10 | 4
11 | 5
12 | """


--------------------------------------------------------------------------------
/code_snippets/numpy/use_list_to_change_position_of_arrays.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
4 | print("Old array\n", arr)
5 | 
6 | new_row_position = [1, 2, 0]
7 | new_arr = arr[new_row_position, : ]
8 | print("New array\n", new_arr)
9 | 


--------------------------------------------------------------------------------
/code_snippets/python/collections_defaultdict.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | classes = defaultdict(lambda: 'Outside')
 4 | classes['Math'] = 'B23'
 5 | classes['Physics'] = 'D24'
 6 | print(classes['Math'])
 7 | # B23
 8 | 
 9 | print(classes['English'])
10 | # Outside


--------------------------------------------------------------------------------
/code_snippets/python/os_system.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.system("echo Files in the current directory are:")
 4 | os.system("ls")
 5 | 
 6 | """
 7 | Files in the current directory are:
 8 | cool_tools  create_snippet.sh  data_science_tools  numpy  pandas  python  terminal
 9 | """
10 | 


--------------------------------------------------------------------------------
/code_snippets/python/partial_function.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | 
 4 | def linear_func(x, a, b):
 5 |     return a * x + b
 6 | 
 7 | 
 8 | linear_func_partial = partial(linear_func, a=2, b=3)
 9 | print(linear_func_partial(2))
10 | print(linear_func_partial(4))
11 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/ngrok_example.sh:
--------------------------------------------------------------------------------
 1 | # Generate config
 2 | 
 3 | jupyter notebook --generate-config
 4 | # Allow remote access
 5 | 
 6 | echo "c.NotebookApp.allow_remote_access = True" >> ~/.jupyter/jupyter_notebook_config.py
 7 | 
 8 | jupyter notebook
 9 | 
10 | ngrok http 8888
11 | 


--------------------------------------------------------------------------------
/code_snippets/python/copy_method.py:
--------------------------------------------------------------------------------
 1 | # Instead of this 
 2 | l1 = [1, 2, 3]
 3 | l2 = l1 
 4 | l2.append(4)
 5 | print(l2)
 6 | # [1, 2, 3, 4]
 7 | 
 8 | print(l1)
 9 | # [1, 2, 3, 4]
10 | 
11 | # Do this
12 | l1 = [1, 2, 3]
13 | l2 = l1.copy()
14 | l2.append(4)
15 | print(l1)
16 | # [1, 2, 3]


--------------------------------------------------------------------------------
/code_snippets/python/re_sub_example.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | text = 'Today is 3/7/2021'
 4 | match_pattern = r'(\d+)/(\d+)/(\d+)'
 5 | 
 6 | print(re.sub(match_pattern, 'Sunday', text))
 7 | # Today is Sunday
 8 | 
 9 | print(re.sub(match_pattern, r'\3-\1-\2', text))
10 | # Today is 2021-3-7


--------------------------------------------------------------------------------
/code_snippets/cool_tools/box_example.py:
--------------------------------------------------------------------------------
 1 | from box import Box
 2 | 
 3 | food_box = Box({"food": {"fruit": {"name": "apple", "flavor": "sweet"}}})
 4 | 
 5 | print(food_box)
 6 | # {'food': {'fruit': {'name': 'apple', 'flavor': 'sweet'}}}
 7 | 
 8 | print(food_box.food.fruit.name)
 9 | # apple
10 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/df_columns_str_start_with.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | df = pd.DataFrame({'pricel': [1, 2, 3],
4 |                     'price2': [2, 3, 4],
5 |                     'year': [2020, 2021, 2021]})
6 | 
7 | mask = df.columns.str.startswith('price')
8 | print(df.loc[:, mask])
9 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/strip_interactive_example.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from strip_interactive import run_interactive
 3 | 
 4 | code = """
 5 | >>> import numpy as np
 6 | >>> print(np.array([1,2,3]))
 7 | [1 2 3]
 8 | >>> print(np.array([4,5,6]))
 9 | [4 5 6]
10 | """
11 | 
12 | run_interactive(code)
13 | 
14 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_dataframe_agg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from collections import Counter
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def count_two(nums: list):
 7 |     return Counter(nums)[2]
 8 | 
 9 | 
10 | df = pd.DataFrame({"coll": [1, 3, 5], "col2": [2, 4, 6]})
11 | print(df.agg(["sum", count_two]))
12 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_series_dt.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame({'date': ['2021/05/13 15:00', '2022-6-20 14:00'],
 4 | 		   'values': [1, 3]})
 5 | 
 6 | df['date'] = pd.to_datetime(df['date'])
 7 | 
 8 | print(df['date'].dt.year)
 9 | 
10 | print(df['date'].dt.time)
11 | 
12 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/snsscrape_example.sh:
--------------------------------------------------------------------------------
1 | # Scrape all tweets from @KhuyenTran16
2 | snscrape twitter-user KhuyenTran16
3 | 
4 | # Save outputs
5 | snscrape twitter-user KhuyenTran16 >> khuyen_tweets 
6 | 
7 | # Scrape 100 tweets with hashtag python
8 | snscrape --max-results 100 twitter-hashtag python


--------------------------------------------------------------------------------
/code_snippets/numpy/np_comparison.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | a = np.array([1, 2, 3])
 4 | b = np.array([4, 1, 2])
 5 | 
 6 | print(a < 2)
 7 | """
 8 | [ True False False]
 9 | """
10 | 
11 | print(a < b)
12 | """
13 | [ True False False]
14 | """
15 | 
16 | print(a[a < b])
17 | """
18 | [1]
19 | """


--------------------------------------------------------------------------------
/code_snippets/python/zip_function.py:
--------------------------------------------------------------------------------
 1 | nums = [1, 2, 3, 4]
 2 | chars = ['a', 'b', 'c', 'd']
 3 | 
 4 | comb = list(zip(nums, chars))
 5 | print(comb)
 6 | # [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]
 7 | 
 8 | nums_2, chars_2 = zip(*comb)
 9 | print(nums_2, chars_2)
10 | # (1, 2, 3, 4) ('a', 'b', 'c', 'd')


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/gdown_example.py:
--------------------------------------------------------------------------------
1 | # pip install gdown
2 | import gdown
3 | 
4 | # Format of url: https://drive.google.com/uc?id=YOURFILEID
5 | url = 'https://drive.google.com/uc?id=1jI1cmxqnwsmC-vbl8dNY6b4aNBtBbKy3'
6 | output = 'Twitter.zip'
7 | 
8 | gdown.download(url, output, quiet=False)
9 | 


--------------------------------------------------------------------------------
/code_snippets/python/pass_statement.py:
--------------------------------------------------------------------------------
 1 | def say_hello():
 2 |     pass 
 3 | 
 4 | def ask_to_sign_in():
 5 |     pass 
 6 | 
 7 | def main(is_user: bool):
 8 |     if is_user:
 9 |         say_hello()
10 |     else:
11 |         ask_to_sign_in()
12 | 
13 | if __name__ == '__main__':
14 |     main(is_user=True)


--------------------------------------------------------------------------------
/code_snippets/numpy/np_array_all.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | 
 3 | a = np.array([[1, 2, 1], [2, 2, 5]])
 4 | print(a)
 5 | 
 6 | # Find if all elements are less than 3 in each column
 7 | print((a < 3).all(axis=0))
 8 | 
 9 | # Find if all elements are less than 3 in each row
10 | print((a < 3).all(axis=1))
11 | 


--------------------------------------------------------------------------------
/code_snippets/python/set_difference.py:
--------------------------------------------------------------------------------
 1 | a = [1, 2, 3, 4]
 2 | b = [1, 3, 4, 5, 6]
 3 | 
 4 | # Find elements in a but not in b
 5 | diff = set(a).difference(set(b))
 6 | print(list(diff))  # [2]
 7 | 
 8 | # Find elements in b but not in a
 9 | diff = set(b).difference(set(a))
10 | print(list(diff))  # [5, 6]
11 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_groupby_sample.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame({"col1": ["a", "a", "b", "c", "c", "d"], "col2": [4, 5, 6, 7, 8, 9]})
 4 | print(df.groupby("col1").sample(n=1))
 5 | 
 6 | """
 7 |   col1  col2
 8 | 0    a     4
 9 | 2    b     6
10 | 4    c     8
11 | 5    d     9
12 | """
13 | 


--------------------------------------------------------------------------------
/code_snippets/terminal/create_symbolic_link.sh:
--------------------------------------------------------------------------------
 1 | information
 2 | # command not found: information
 3 | 
 4 | which htop
 5 | # /usr/bin/htop
 6 | 
 7 | # Create symbolic link between 2 files
 8 | # In -s [EXISTING_FILE] [NEW_FILE]
 9 | sudo ln -s /usr/bin/htop /usr/bin/information
10 | 
11 | information
12 | # Works now!


--------------------------------------------------------------------------------
/code_snippets/pandas/df_agg.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, 5]})
 4 | 
 5 | print(df.agg({"a": ["sum", "mean"], "b": ["min", "max"]}))
 6 | 
 7 | """
 8 |          a    b
 9 | sum   10.0  NaN
10 | mean   2.5  NaN
11 | min    NaN  2.0
12 | max    NaN  5.0
13 | """
14 | 


--------------------------------------------------------------------------------
/code_snippets/python/slice_example.py:
--------------------------------------------------------------------------------
 1 | data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 2 | 
 3 | # Instead of this
 4 | some_sum = sum(data[:8]) * sum(data[8:])
 5 | 
 6 | # do this
 7 | JANUARY = slice(0, 8)
 8 | FEBRUARY = slice(8, len(data))
 9 | some_sum = sum(data[JANUARY] * sum(data[FEBRUARY]))
10 | print(some_sum) 
11 | # 684


--------------------------------------------------------------------------------
/code_snippets/terminal/tr_command.sh:
--------------------------------------------------------------------------------
1 | echo $PATH
2 | # /home/user/Python-data-science-code-snippet/venv/bin:/home/khuyentran/anaconda3/bin:/home/user/.poetry/bin
3 | 
4 | echo $PATH | tr ":" "\n"
5 | """
6 | /home/user/Python-data-science-code-snippet/venv/bin
7 | /home/khuyentran/anaconda3/bin
8 | /home/user/.poetry/bin
9 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/distfit_example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from distfit import distfit
 3 | 
 4 | X = np.random.normal(0, 3, 1000)
 5 | 
 6 | # Initialize model
 7 | dist = distfit()
 8 | 
 9 | # Find best theoretical distribution for empirical data X
10 | distribution = dist.fit_transform(X)
11 | dist.plot()
12 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/extract_various_data.py:
--------------------------------------------------------------------------------
1 | import os 
2 | from datetime import datetime 
3 | import pandas_datareader.data as web 
4 | 
5 | df = web.DataReader('AD', 'av-daily', start=datetime(2008, 1, 1),
6 |                     end = datetime(2018, 2, 28),
7 |                     api_key=os.getenv('ALPHAVANTAGE_API_KEY'))


--------------------------------------------------------------------------------
/code_snippets/python/filter_example.py:
--------------------------------------------------------------------------------
 1 | def get_fruit(val: str):
 2 |     fruits = ['apple', 'orange', 'grape']
 3 |     if val in fruits:
 4 |         return True 
 5 |     else:
 6 |         return False 
 7 | 
 8 | items = ['chair', 'apple', 'water', 'table', 'orange']
 9 | fruits = filter(get_fruit, items)
10 | print(list(fruits))


--------------------------------------------------------------------------------
/code_snippets/cool_tools/heart_rate.py:
--------------------------------------------------------------------------------
 1 | import heartrate 
 2 | heartrate.trace(browser=True)
 3 | 
 4 | def factorial(x):
 5 |     if x == 1:
 6 |         return 1
 7 |     else:
 8 |         return (x * factorial(x-1))
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     num = 5
13 |     print(f"The factorial of {num} is {factorial(num)}")


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/faker_example.py:
--------------------------------------------------------------------------------
 1 | # pip install faker
 2 | from faker import Faker
 3 | 
 4 | fake = Faker()
 5 | 
 6 | print(fake.color_name())
 7 | 
 8 | print(fake.name())
 9 | 
10 | print(fake.address())
11 | 
12 | print(fake.date_of_birth(minimum_age=22))
13 | 
14 | print(fake.city())
15 | 
16 | print(fake.job())
17 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/maya_example.py:
--------------------------------------------------------------------------------
1 | import maya
2 | 
3 | # Automatically parse datetime string
4 | string = '2016-12-16 18:23:45.423992+00:00'
5 | print(maya.parse(string).datetime())
6 | # 2016-12-16 18:23:45.423992+00:00
7 | 
8 | print(maya.parse(string).datetime(to_timezone='US/Central'))
9 | # 2016-12-16 12:23:45.423992-06:00


--------------------------------------------------------------------------------
/code_snippets/python/return_multiple_values_with_dictionary.py:
--------------------------------------------------------------------------------
 1 | def return_many_values():
 2 |     a = 1
 3 |     b = 2
 4 |     c = 3 
 5 |     d = 4
 6 |     # Instead of return a, b, c, d
 7 |     return {'a': a, 'b': b, 'c': c, 'd': d}
 8 | 
 9 | values = return_many_values()
10 | print(values['a'])
11 | # 1
12 | print(values['b'])
13 | # 2


--------------------------------------------------------------------------------
/code_snippets/pandas/pct_change.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'a': [20, 35, 10], 'b': [1, 2, 3]})
 4 | print(df)
 5 | """
 6 |     a  b
 7 | 0  20  1
 8 | 1  35  2
 9 | 2  10  3
10 | """
11 | 
12 | print(df.a.pct_change())
13 | """ 
14 | 0         NaN
15 | 1    0.750000
16 | 2   -0.714286
17 | Name: a, dtype: float64
18 | """


--------------------------------------------------------------------------------
/code_snippets/python/collections_ordereddict.py:
--------------------------------------------------------------------------------
1 | import collections
2 | 
3 | tasks = collections.OrderedDict(laundry=0.5, shopping=2, clean=2)
4 | tasks['movie'] = 2
5 | print(tasks)
6 | # OrderedDict([('laundry', 0.5), ('shopping', 2), ('clean', 2), ('movie', 2)])
7 | 
8 | print(tasks.keys())
9 | # odict_keys(['laundry', 'shopping', 'clean', 'movie'])


--------------------------------------------------------------------------------
/code_snippets/python/pathlib_iterate_files_end_with/main.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path 
 2 | 
 3 | directory_name = 'data'
 4 | 
 5 | # Loop files in a directory
 6 | pathlist = Path(directory_name).rglob('*.csv')
 7 | for path in pathlist:
 8 |     print(str(path))
 9 | 
10 | """ 
11 | data/data3.csv
12 | data/data1.csv
13 | data/data2.csv
14 | """


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/ambv/black
 3 |     rev: 20.8b1
 4 |     hooks:
 5 |     - id: black
 6 | -   repo: https://gitlab.com/pycqa/flake8
 7 |     rev: 3.8.4
 8 |     hooks:
 9 |     - id: flake8
10 | -   repo: https://github.com/timothycrosley/isort
11 |     rev: 5.7.0
12 |     hooks:
13 |     -   id: isort
14 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pytest_benchmark_example.py:
--------------------------------------------------------------------------------
 1 | def list_comprehension(len_list=5):
 2 |     return [i for i in range(len_list)]
 3 | 
 4 | 
 5 | def test_concat(benchmark):
 6 |     res = benchmark(list_comprehension)
 7 |     assert res == [0, 1, 2, 3, 4]
 8 | 
 9 | 
10 | """On your terminal
11 | pytest pytest_benchmark.py
12 | """
13 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/snoop_example.py:
--------------------------------------------------------------------------------
 1 | # pip install snoop
 2 | import snoop 
 3 | 
 4 | @snoop
 5 | def factorial(x: int):
 6 |     if x == 1:
 7 |         return 1
 8 |     else: 
 9 |         return (x * factorial(x-1))
10 | 
11 | if __name__ == '__main__':
12 |     num = 2
13 |     print(f'The factorial of {num} is {factorial(num)}')
14 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/mypy_example.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | def get_name_price(fruits: list) -> Union[list, tuple]:
 4 |     return zip(*fruits)
 5 | 
 6 | fruits = [('apple', 2), ('orange', 3), ('grape', 2)]
 7 | names, prices = get_name_price(fruits)
 8 | print(names)  # ('apple', 'orange', 'grape')
 9 | print(prices)  # (2, 3, 2)
10 | 


--------------------------------------------------------------------------------
/code_snippets/numpy/any_all.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | a = np.array([[1, 2, 1], [2, 2, 5]])
 4 | 
 5 | # get the rows whose all values are fewer than 3
 6 | mask_all = (a<3).all(axis=1)
 7 | print(a[mask_all])
 8 | """
 9 | [[1 2 1]]
10 | """
11 | 
12 | mask_any = (a<3).any(axis=1)
13 | print(a[mask_any])
14 | """ 
15 | [[1 2 1]
16 |  [2 2 5]]
17 | """


--------------------------------------------------------------------------------
/code_snippets/numpy/np_linspace.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | x = np.linspace(2, 4, num=10)
 5 | print(x)
 6 | """
 7 | [2.         2.22222222 2.44444444 2.66666667 2.88888889 3.11111111
 8 |  3.33333333 3.55555556 3.77777778 4.        ]
 9 | """
10 | 
11 | y = np.arange(10)
12 | 
13 | plt.plot(x, y)
14 | plt.show()
15 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/yarl_example.py:
--------------------------------------------------------------------------------
 1 | from yarl import URL 
 2 | 
 3 | url = URL('https://github.com')
 4 | new_url = url/ "search" % 'q=data+science'
 5 | print(new_url) # https://github.com/search?q=data+science
 6 | 
 7 | print(new_url.host) # github.com
 8 | print(new_url.path) # /search
 9 | print(new_url.query_string) # q=machine learning
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/exclude_outliers.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | data = {"col0": [9, -3, 0, -1, 5]}
 4 | df = pd.DataFrame(data)
 5 | 
 6 | lower = df.col0.quantile(0.05)
 7 | upper = df.col0.quantile(0.95)
 8 | 
 9 | print(df.clip(lower=lower, upper=upper))
10 | """ 
11 |    col0
12 | 0   8.2
13 | 1  -2.6
14 | 2   0.0
15 | 3  -1.0
16 | 4   5.0
17 | """
18 | 


--------------------------------------------------------------------------------
/code_snippets/python/__call__example.py:
--------------------------------------------------------------------------------
 1 | class DataLoader:
 2 | 
 3 | 	def __init__(self, data_dir: str):
 4 | 		self.data_dir = data_dir
 5 | 		print("Instance is created")
 6 | 
 7 | 	def __call__(self):
 8 | 		print("Instance is called")
 9 | 
10 | data_loader = DataLoader('my_data_dir')
11 | # Instance is created
12 | 
13 | data_loader()
14 | # Instance is called


--------------------------------------------------------------------------------
/code_snippets/python/enumerate_example.py:
--------------------------------------------------------------------------------
 1 | arr = ['a', 'b', 'c', 'd', 'e']
 2 | 
 3 | # Instead of this
 4 | for i in range(len(arr)):
 5 |     print(i, arr[i])
 6 | """ 
 7 | 0 a
 8 | 1 b
 9 | 2 c
10 | 3 d
11 | 4 e
12 | """
13 | 
14 | # Use this
15 | for i, val in enumerate(arr):
16 |     print(i, val)
17 | """ 
18 | 0 a
19 | 1 b
20 | 2 c
21 | 3 d
22 | 4 e
23 | """


--------------------------------------------------------------------------------
/code_snippets/python/multiprocessing_example.py:
--------------------------------------------------------------------------------
 1 | from joblib import Parallel, delayed
 2 | import multiprocessing
 3 | 
 4 | def add_three(num: int):
 5 |     return num + 3
 6 | 
 7 | num_cores = multiprocessing.cpu_count()
 8 | results = Parallel(n_jobs=num_cores)(delayed(add_three)(i) for i in range(10))
 9 | print(results)
10 | # [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]


--------------------------------------------------------------------------------
/code_snippets/python/string_find.py:
--------------------------------------------------------------------------------
 1 | sentence = "Today is Saturaday"
 2 | 
 3 | # Find the index of first occurrence of the substring
 4 | print(sentence.find("day") )
 5 | # 2
 6 | 
 7 | # Start searching for the substring at index 3
 8 | print(sentence.find("day", 3))
 9 | # 17
10 | 
11 | print(sentence.find("nice"))
12 | #-1 
13 | # No substring is found
14 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/knockknock_example.py:
--------------------------------------------------------------------------------
1 | from knockknock import email_sender 
2 | 
3 | @email_sender(recipient_emails=['<your_email@address.com>', '<your_second_email@adress.com>'],
4 | sender_email="<grandma's_email@gmail.com>")
5 | def train_your_nicest_model(your_nicest_parameters):
6 |     import time 
7 |     time.sleep(10_000)
8 |     return {'loss': 0.9}
9 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/streamlit_app.py:
--------------------------------------------------------------------------------
 1 | # pip install spacy-streamlit
 2 | # python -m spacy download en_core_web_sm
 3 | 
 4 | 
 5 | import spacy_streamlit
 6 | 
 7 | models = ['en_core_web_sm']
 8 | text = "Today is a beautiful day"
 9 | spacy_streamlit.visualize(models, text)
10 | 
11 | """On your terminal, type:
12 | streamlit run streamlit_app.py 
13 | """


--------------------------------------------------------------------------------
/code_snippets/pandas/df_assign.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
 4 | 
 5 | df = (df.assign(col3=lambda x: x.col1 * 100 + x.col2)
 6 |     .assign(col4=lambda x: x.col2 * x.col3)
 7 |     )
 8 | print(df)
 9 | """ 
10 |    col1  col2  col3  col4
11 | 0     1     3   103   309
12 | 1     2     4   204   816
13 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/extract_holidays.py:
--------------------------------------------------------------------------------
 1 | # pip install holidays
 2 | from datetime import date 
 3 | import holidays
 4 | 
 5 | us_holidays = holidays.UnitedStates()
 6 | 
 7 | print('2014-07-04' in us_holidays)
 8 | # True
 9 | 
10 | print(us_holidays.get('2014-7-4'))
11 | # Independence Day
12 | 
13 | print(us_holidays.get('2014/7/4'))
14 | # Independence Day
15 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_cut.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'a': [1, 3, 7, 11, 14, 17]})
 4 | 
 5 | bins = [0, 5, 10, 15, 20]
 6 | df['binned'] = pd.cut(df['a'], bins=bins)
 7 | 
 8 | print(df)
 9 | """
10 |     a    binned
11 | 0   1    (0, 5]
12 | 1   3    (0, 5]
13 | 2   7   (5, 10]
14 | 3  11  (10, 15]
15 | 4  14  (10, 15]
16 | 5  17  (15, 20]
17 | """


--------------------------------------------------------------------------------
/code_snippets/terminal/sed_command/sed_command.sh:
--------------------------------------------------------------------------------
 1 | cat weather.txt 
 2 | # Today is a sunny day. I want to be outside.
 3 | 
 4 | sed 's/sunny/rainy/' weather.txt 
 5 | # Today is a rainy day. I want to be outside.
 6 | 
 7 | sed -i 's/sunny/rainy/' weather.txt 
 8 | sed -i 's/outside/inside/' weather.txt 
 9 | 
10 | cat weather.txt
11 | # Today is a rainy day. I want to be inside


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/convert_number_to_words.py:
--------------------------------------------------------------------------------
 1 | # pip install num2words
 2 | from num2words import num2words
 3 | 
 4 | print(num2words(105))
 5 | # one hundred and five
 6 | 
 7 | print(num2words(105, to='ordinal'))
 8 | # one hundred and fifth
 9 | 
10 | print(num2words(105, lang='vi'))
11 | # một trăm lẻ năm
12 | 
13 | print(num2words(105, lang='es'))
14 | # ciento cinco


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/hydra_example/main.py:
--------------------------------------------------------------------------------
 1 | import hydra 
 2 | 
 3 | @hydra.main(config_name='config.yml')
 4 | def main(config):
 5 |     print(f'Process {config.data}')
 6 |     print(f'Drop features: {config.variables.drop_features}')
 7 | 
 8 | if __name__ == '__main__':
 9 |     main()
10 | 
11 | """ 
12 | Process data1
13 | Drop features: ['iid', 'id', 'idg', 'wave']
14 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/tqdm_example.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | from tqdm import tqdm 
 3 | import time 
 4 | 
 5 | df = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [2, 3, 4, 5, 6]})
 6 | 
 7 | tqdm.pandas()
 8 | def func(row):
 9 |     time.sleep(1)
10 |     return row + 1
11 | 
12 | df['a'].progress_apply(func)
13 | """
14 | 80%|██████████████████████████▍      | 4/5 [00:03<00:00,  1.22it/s]
15 | """


--------------------------------------------------------------------------------
/code_snippets/python/namedtuple_example.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | Person = namedtuple("Person", "name gender")
 4 | 
 5 | oliver = Person("Oliver", "male")
 6 | khuyen = Person("Khuyen", "female")
 7 | 
 8 | print(oliver)
 9 | # Person(name='Oliver', gender='male')
10 | 
11 | print(khuyen)
12 | # Person(name='Khuyen', gender='female')
13 | 
14 | print(oliver.name)
15 | # Oliver
16 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/schedule_example.py:
--------------------------------------------------------------------------------
 1 | import schedule 
 2 | import time 
 3 | 
 4 | def get_incoming_data():
 5 |     print("Get incoming data")
 6 | 
 7 | def train_model():
 8 |     print("Retraining model")
 9 | 
10 | schedule.every().day.at("10:30").do(get_incoming_data)
11 | schedule.every().wednesday.at("08:00").do(train_model)
12 | 
13 | while True:
14 |     schedule.run_pending()
15 |     time.sleep(1)


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_reindex.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | s = pd.Series([1, 2, 3], index=['2021-07-20', '2021-07-23', '2021-07-25'])
 4 | s.index = pd.to_datetime(s.index)
 5 | print(s)
 6 | 
 7 | # Get dates ranging from 2021/7/20 to 2021/7/25
 8 | new_index = pd.date_range('2021-07-20', '2021-07-25')
 9 | 
10 | # Conform Series to new index
11 | new_s = s.reindex(new_index, fill_value=0)
12 | print(new_s)


--------------------------------------------------------------------------------
/code_snippets/pandas/select_columns_start_with.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5], "year": [2019, 2019, 2020]})
 4 | 
 5 | chosen_cols = df.columns.str.startswith('col')
 6 | print(chosen_cols)
 7 | """[ True  True False]"""
 8 | 
 9 | print(df.loc[:, chosen_cols])
10 | """ 
11 |    col1  col2
12 | 0     1     3
13 | 1     2     4
14 | 2     3     5
15 | """
16 | 


--------------------------------------------------------------------------------
/code_snippets/python/compare_execution_time.py:
--------------------------------------------------------------------------------
 1 | import time 
 2 | import timeit 
 3 | 
 4 | def func():
 5 |     """comprehension"""
 6 |     l = [i for i in range(10_000)]
 7 | 
 8 | def func2():
 9 |     """list range"""
10 |     l = list(range(10_000))
11 | 
12 | expSize = 1000
13 | time1 = timeit.timeit(func, number=expSize)
14 | time2 = timeit.timeit(func2, number=expSize)
15 | 
16 | print(time1/time2)
17 | # 1.738841810509789


--------------------------------------------------------------------------------
/code_snippets/python/defaultdict_example.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | # Instead of this
 4 | food_price = {'apple': [], 'orange': []}
 5 | 
 6 | # Use this
 7 | food_price = defaultdict(list)
 8 | 
 9 | for i in range(1, 4):
10 |     food_price['apple'].append(i)
11 |     food_price['orange'].append(i)    
12 | 
13 | print(food_price.items()) 
14 | # dict_items([('apple', [1, 2, 3]), ('orange', [1, 2, 3])])
15 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/df_fillna.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | df = pd.DataFrame({"a": [1, np.nan, 3], "b": [4, 5, np.nan], "c": [1, 2, 3]})
 5 | print(df)
 6 | """
 7 |      a    b  c
 8 | 0  1.0  4.0  1
 9 | 1  NaN  5.0  2
10 | 2  3.0  NaN  3
11 | """
12 | 
13 | df = df.fillna(method="ffill")
14 | print(df)
15 | """
16 |      a    b  c
17 | 0  1.0  4.0  1
18 | 1  1.0  5.0  2
19 | 2  3.0  5.0  3
20 | """
21 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/s.is_in.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
 4 | print(df)
 5 | """ 
 6 |    a  b
 7 | 0  1  4
 8 | 1  2  5
 9 | 2  3  6
10 | """
11 | 
12 | l = [1, 2, 6, 7]
13 | print(df.a.isin(l))
14 | """ 
15 | 0     True
16 | 1     True
17 | 2    False
18 | Name: a, dtype: bool
19 | """
20 | 
21 | df = df[df.a.isin(l)]
22 | print(df)
23 | """ 
24 |    a  b
25 | 0  1  4
26 | 1  2  5
27 | """


--------------------------------------------------------------------------------
/code_snippets/python/__str__and__repr.py:
--------------------------------------------------------------------------------
 1 | class Food:
 2 |     def __init__(self, name: str, color: str):
 3 |         self.name = name 
 4 |         self.color = color
 5 | 
 6 |     def __str__(self):
 7 |         return f'{self.color} {self.name}'
 8 | 
 9 |     def __repr__(self):
10 |         return f'Food({self.color}, {self.name})'
11 | 
12 | food = Food('apple', 'red')
13 | 
14 | print(food) #  str__
15 | 
16 | print(repr(food)) #__repr__
17 | 


--------------------------------------------------------------------------------
/code_snippets/python/datetime_timedelta.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, datetime, timedelta
 2 | 
 3 | beginning = '2020/01/03 23:59:00'
 4 | duration_in_minutes = 2500
 5 | 
 6 | # Find the beginning time
 7 | beginning = datetime.strptime(beginning, '%Y/%m/%d %H:%M:%S')
 8 | 
 9 | # Find duration in days
10 | days = timedelta(minutes=duration_in_minutes)
11 | 
12 | # Find end time
13 | end = beginning + days 
14 | print(end)
15 | # 2020-01-05 17:39:00


--------------------------------------------------------------------------------
/code_snippets/python/improve_json_readability.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | pet = dict(
 3 | 	kind="dog",
 4 | 	name= "Bim Bim",
 5 | 	age=7,
 6 | 	favorite_food='yogurt'
 7 | )
 8 | print(json.dumps(pet))
 9 | # {"kind": "dog", "name": "Bim Bim", "age": 7, "favorite_food": "yogurt"}
10 | 
11 | print(json.dumps(pet, indent=4))
12 | """ 
13 | {
14 |     "kind": "dog",
15 |     "name": "Bim Bim",
16 |     "age": 7,
17 |     "favorite_food": "yogurt"
18 | }
19 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/rich_console.py:
--------------------------------------------------------------------------------
 1 | # pip install rich
 2 | from rich import console
 3 | from rich.console import Console 
 4 | import pandas as pd 
 5 | 
 6 | console = Console()
 7 | 
 8 | data = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
 9 | 
10 | def edit_data(data):
11 |     var_1 = 45
12 |     var_2 = 30
13 |     var_3 = var_1 + var_2
14 |     data['a'] = [var_1, var_2, var_3]
15 |     console.log(data, log_locals=True)
16 | 
17 | edit_data(data)


--------------------------------------------------------------------------------
/code_snippets/python/itertools_combinations_example.py:
--------------------------------------------------------------------------------
 1 | from itertools import combinations
 2 | num_list = [1, 2, 3]
 3 | for i in num_list: # instead of this
 4 |     for j in num_list:
 5 |         if i != j:
 6 |             print(i, j)
 7 | """ 
 8 | 1 2
 9 | 1 3
10 | 2 1
11 | 2 3
12 | 3 1
13 | 3 2
14 | """
15 | comb = combinations(num_list, 2) # use this
16 | for pair in list(comb):
17 |     print(pair)
18 | """ 
19 | (1, 2)
20 | (1, 3)
21 | (2, 3)
22 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/geopy_example.py:
--------------------------------------------------------------------------------
 1 | # pip install geopy
 2 | 
 3 | from geopy.geocoders import Nominatim
 4 | geolocator = Nominatim(user_agent='find_location')
 5 | location = geolocator.geocode('30 North Circle Drive, Edwardsville, IL')
 6 | 
 7 | print(location.address)
 8 | # 30, Circle Drive, Edwardsville, Madison County, Illinois, 62025, United States
 9 | 
10 | print(location.latitude, location.longitude)
11 | # 38.80371599362934 -89.93842706888563


--------------------------------------------------------------------------------
/code_snippets/python/list_comprehension.py:
--------------------------------------------------------------------------------
 1 | from timeit import timeit
 2 | 
 3 | def for_loop():
 4 |     result = []
 5 |     for i in range(1_000_000):
 6 |         result.append(i)
 7 |     return result
 8 | 
 9 | def list_comprehesion():
10 |     return [i for i in range(1_000_000)]
11 | 
12 | expSize = 1000
13 | time1 = timeit(for_loop, number=expSize)
14 | time2 = timeit(list_comprehesion, number=expSize)
15 | 
16 | print(time1/time2)
17 | # 1.4560360180596434
18 | 
19 | 


--------------------------------------------------------------------------------
/code_snippets/python/staticmethod_example.py:
--------------------------------------------------------------------------------
 1 | import re 
 2 | 
 3 | class ProcessText:
 4 |     def __init__(self, text_column: str):
 5 |         self.text_column = text_column
 6 | 
 7 |     @staticmethod
 8 |     def remove_URL(sample: str) -> str:
 9 |         """Replace url with empty space"""
10 |         return re.sub(r'http\S+', "", sample)
11 | 
12 | text = ProcessText.remove_URL('My favorite page is https://www.google.com')
13 | print(text)
14 | # My favorite page is 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/wordfreq_example.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | from wordfreq import word_frequency
 4 | 
 5 | print(word_frequency("eat", "en"))  # 0.000135
 6 | print(word_frequency("the", "en"))  # 0.0537
 7 | 
 8 | sentence = "There is a dog running in a park"
 9 | words = sentence.split(" ")
10 | word_frequencies = [word_frequency(word, "en") for word in words]
11 | 
12 | sns.barplot(words, word_frequencies)
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/dtreeviz_example.py:
--------------------------------------------------------------------------------
 1 | from dtreeviz.trees import dtreeviz
 2 | from sklearn import tree
 3 | from sklearn.datasets import load_wine
 4 | 
 5 | wine = load_wine()
 6 | classifier = tree.DecisionTreeClassifier(max_depth=2)
 7 | classifier.fit(wine.data, wine.target)
 8 | 
 9 | vis = dtreeviz(
10 |     classifier,
11 |     wine.data,
12 |     wine.target,
13 |     target_name="wine_type",
14 |     feature_names=wine.feature_names,
15 | )
16 | 
17 | vis.view()
18 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pytest_example.py:
--------------------------------------------------------------------------------
 1 | from textblob import TextBlob
 2 | 
 3 | def extract_sentiment(text: str):
 4 |     '''Extract sentiment using textblob. 
 5 |     Polarity is within range [-1, 1]'''
 6 | 
 7 |     text = TextBlob(text)
 8 | 
 9 |     return text.sentiment.polarity
10 | 
11 | def test_extract_sentiment_negative():
12 | 
13 |     text = "I do not think this will turn out well"
14 | 
15 |     sentiment = extract_sentiment(text)
16 | 
17 |     assert sentiment < 0


--------------------------------------------------------------------------------
/code_snippets/pandas/pd_series_str.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | fruits = pd.Series(['Orange', 'Apple', 'Grape'])
 4 | print(fruits)
 5 | """
 6 | 0    Orange
 7 | 1     Apple
 8 | 2     Grape
 9 | dtype: object
10 | """
11 | 
12 | print(fruits.str.lower())
13 | """
14 | 0    orange
15 | 1     apple
16 | 2     grape
17 | dtype: object
18 | """
19 | 
20 | print(fruits.str.lower().str.replace("e", "a"))
21 | """
22 | 0    oranga
23 | 1     appla
24 | 2     grapa
25 | dtype: object
26 | """
27 | 
28 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/dvc_example.sh:
--------------------------------------------------------------------------------
 1 | # Initialize
 2 | dvc init
 3 | 
 4 | # Track data directory
 5 | dvc add data # Create data.dvc
 6 | git add data.dvc
 7 | git commit -m "add data"
 8 | 
 9 | # Store the data remotely
10 | dvc remote add -d remote gdrive://lynNBbT-4J0ida0eKYQqZZbC93juUUUbVH
11 | 
12 | # Push the data to remote storage
13 | dvc push 
14 | 
15 | # Get the data
16 | dvc pull 
17 | 
18 | # Switch between different version
19 | git checkout HEAD^1 data.dvc
20 | dvc checkout
21 | 


--------------------------------------------------------------------------------
/code_snippets/python/multiples_of_a_number.py:
--------------------------------------------------------------------------------
 1 | def get_multiples_of_n(nums: list, n: int):
 2 | 	"""Select only numbers whose remainders
 3 | 	 are 0 when dividing them by n"""
 4 | 	return [num for num in nums if num % n == 0]
 5 | 
 6 | nums = [1, 4, 9, 12, 15, 16]
 7 | 
 8 | print(get_multiples_of_n(nums, 2)) # multiples of 2
 9 | # [4, 12, 16]
10 | 
11 | print(get_multiples_of_n(nums, 3)) # multiples of 3
12 | # [9, 12, 15]
13 | 
14 | print(get_multiples_of_n(nums, 4)) # multiples of 4
15 | # [4, 12, 16]
16 | 


--------------------------------------------------------------------------------
/code_snippets/numpy/numpy_testing_almost_equal.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.testing import assert_almost_equal, assert_array_equal
 3 | 
 4 | a = np.array([[1.222, 2.222], [3.222, 4.222]])
 5 | test = np.array([[1.221, 2.221], [3.221, 4.221]])
 6 | assert_almost_equal(a, test, decimal=2)
 7 | 
 8 | assert_array_equal(a, test)
 9 | """AssertionError: 
10 | Arrays are not equal
11 | 
12 | Mismatched elements: 4 / 4 (100%)
13 | Max absolute difference: 0.001
14 | Max relative difference: 0.000819
15 | """
16 | 


--------------------------------------------------------------------------------
/code_snippets/python/collections_counter.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | 
 3 | char_list = ['a', 'b', 'c', 'a', 'd', 'b', 'b']
 4 | 
 5 | # Instead of this
 6 | custom_counter = {}
 7 | for char in char_list:
 8 |     if char not in custom_counter:
 9 |         custom_counter[char] = 1
10 |     else:
11 |         custom_counter[char] += 1
12 | 
13 | print(custom_counter)
14 | # {'a': 2, 'b': 3, 'c': 1, 'd': 1}
15 | 
16 | # Use this
17 | print(Counter(char_list))
18 | # Counter({'b': 3, 'a': 2, 'c': 1, 'd': 1})


--------------------------------------------------------------------------------
/code_snippets/pandas/df_pivot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame(
 4 |     {
 5 |         "item": ["apple", "apple", "apple", "apple", "apple"],
 6 |         "size": ["small", "small", "large", "large", "large"],
 7 |         "location": ["Walmart", "Aldi", "Walmart", "Aldi", "Aldi"],
 8 |         "price": [3, 2, 4, 3, 2.5],
 9 |     }
10 | )
11 | 
12 | print(df)
13 | 
14 | pivot = pd.pivot_table(
15 |     df, values="price", index=["item", "size"], columns=["location"], aggfunc="mean"
16 | )
17 | print(pivot)
18 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/icecream_example.py:
--------------------------------------------------------------------------------
 1 | from icecream import ic
 2 | 
 3 | def plus_one(num):
 4 |     return num + 1
 5 | 
 6 | # Instead of this
 7 | print('output of plus_on with num = 1:', plus_one(1))
 8 | print('output of plus_on with num = 2:', plus_one(2))
 9 | 
10 | # Use this
11 | ic(plus_one(1))
12 | ic(plus_one(2))
13 | 
14 | # One your terminal
15 | """
16 | $ python icecream_example. py
17 | output of plus_on with num = 1: 2
18 | output of plus_on with num = 2: 3
19 | ic| plus_one(1): 2
20 | ic| plus_one(2): 3
21 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/loguru_example.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import mean_squared_error
 2 | import numpy as np
 3 | from loguru import logger
 4 | 
 5 | logger.add("file_{time}.log", format="{time} {level} {message}")
 6 | 
 7 | @logger.catch
 8 | def evaluate_result(y_true: np.array, y_pred: np.array):
 9 |     mean_square_err = mean_squared_error(y_true, y_pred)
10 |     root_mean_square_err = mean_square_err ** 0.5
11 | 
12 | y_true = np.array([1, 2, 3])
13 | y_pred = np.array([1.5, 2.2])
14 | evaluate_result(y_true, y_pred)
15 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/set_categories.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'col1': ['large', 'small', 'mini', 'medium', 'mini'],
 4 |                 'col2': [1, 2, 3, 4, 5]})
 5 | ordered_sizes = 'large', 'medium', 'small', 'mini'
 6 | 
 7 | df.col1 = df.col1.astype('category')
 8 | df.col1.cat.set_categories(ordered_sizes, ordered=True, inplace=True)
 9 | print(df.sort_values(by='col1'))
10 | """ 
11 |      col1  col2
12 | 0   large     1
13 | 3  medium     4
14 | 1   small     2
15 | 2    mini     3
16 | 4    mini     5
17 | """


--------------------------------------------------------------------------------
/code_snippets/python/getattr_example.py:
--------------------------------------------------------------------------------
 1 | class Food:
 2 |     def __init__(self, name: str, color: str):
 3 |         self.name = name 
 4 |         self.color = color 
 5 | 
 6 | apple = Food('apple', 'red')
 7 | 
 8 | print("The color of apple is", getattr(apple, 'color', 'yellow'))
 9 | # The color of apple is red
10 | 
11 | print("The flavor of apple is", getattr(apple, 'flavor', 'sweet'))
12 | # The flavor of apple is sweet
13 | 
14 | print("The flavor of apple is", apple.sweet)
15 | # AttributeError: 'Food' object has no attribute 'sweet'


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pytest_parametrize.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | def text_contain_word(word: str, text: str):
 4 |     '''Find whether the text contains a particular word'''
 5 |     
 6 |     return word in text
 7 | 
 8 | test = [
 9 |     ('There is a duck in this text',True),
10 |     ('There is nothing here', False)
11 |     ]
12 | 
13 | @pytest.mark.parametrize('sample, expected', test)
14 | def test_text_contain_word(sample, expected):
15 | 
16 |     word = 'duck'
17 | 
18 |     assert text_contain_word(word, sample) == expected


--------------------------------------------------------------------------------
/code_snippets/pandas/assert_frame_equal.py:
--------------------------------------------------------------------------------
 1 | from pandas.testing import assert_frame_equal
 2 | import pandas as pd 
 3 | 
 4 | 
 5 | df1 = pd.DataFrame({'coll': [1,2,3], 'col2': [4,5,6]})
 6 | df2 = pd.DataFrame({'coll': [1,3,4], 'col2': [4,5,6]})
 7 | assert_frame_equal(df1, df2)
 8 | 
 9 | """
10 | AssertionError: DataFrame.iloc[:, 0] (column name="coll") are different
11 | 
12 | DataFrame.iloc[:, 0] (column name="coll") values are different (66.66667 %)
13 | [index]: [0, 1, 2]
14 | [left]:  [1, 2, 3]
15 | [right]: [1, 3, 4]
16 | """
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/code_snippets/python/args_example.py:
--------------------------------------------------------------------------------
 1 | sample_range = (2, 5)
 2 | sample_range2 = (3, 7)
 3 | 
 4 | # With *
 5 | print(list(range(*sample_range)))
 6 | print(list(range(*sample_range2)))
 7 | """
 8 | [2, 3, 4]
 9 | [3, 4, 5, 6]
10 | """
11 | 
12 | # Without *
13 | print(list(range(sample_range)))
14 | """
15 | Traceback (most recent call last):
16 |   File "code_snippets/python/args_example.py", line 9, in <module>
17 |     print(list(range(sample_range)))
18 | TypeError: 'tuple' object cannot be interpreted as an integer
19 | """
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/getname_example.py:
--------------------------------------------------------------------------------
 1 | # pip install dill
 2 | 
 3 | from sklearn.linear_model import LogisticRegression
 4 | from sklearn.neighbors import KNeighborsClassifier
 5 | from dill.source import getname
 6 | 
 7 | def save_model(model):
 8 |     model_name = getname(model)
 9 |     print(f"Saving model as model/{model_name}.pkl")
10 | 
11 | save_model(KNeighborsClassifier)
12 | save_model(LogisticRegression)
13 | 
14 | """Output
15 | Saving model as model/KNeighborsClassifier.pkl
16 | Saving model as model/LogisticRegression.pkl
17 | """


--------------------------------------------------------------------------------
/code_snippets/pandas/df_diff.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [2, 3, 4, 6]})
 3 | diff = df.diff()
 4 | print(diff)
 5 | """ 
 6 |      a    b
 7 | 0  NaN  NaN
 8 | 1  1.0  1.0
 9 | 2  1.0  1.0
10 | 3  1.0  2.0
11 | """
12 | 
13 | shift = diff.shift(-1)
14 | print(shift)
15 | """ 
16 |      a    b
17 | 0  1.0  1.0
18 | 1  1.0  1.0
19 | 2  1.0  2.0
20 | 3  NaN  NaN
21 | """
22 | 
23 | processed_df = shift.dropna()
24 | print(processed_df)
25 | """ 
26 |      a    b
27 | 0  1.0  1.0
28 | 1  1.0  1.0
29 | 2  1.0  2.0
30 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/graphviz_example.py:
--------------------------------------------------------------------------------
 1 | # pip install graphviz
 2 | from graphviz import Graph 
 3 | 
 4 | # Instantiate a new Graph object
 5 | dot = Graph('Data Science Process', format='png')
 6 | 
 7 | # Add nodes
 8 | dot.node('A', 'Get Data')
 9 | dot.node('B', 'Clean, Prepare, & Manipulate Data')
10 | dot.node('C', 'Train Model')
11 | dot.node('D', 'Test Data')
12 | dot.node('E', 'Improve')
13 | 
14 | # Connect these nodes
15 | dot.edges(['AB', 'BC', 'CD', 'DE'])
16 | 
17 | # Save chart
18 | dot.render('data_science_flowchart', view=True)
19 | 


--------------------------------------------------------------------------------
/code_snippets/python/abc_example.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod 
 2 | 
 3 | class Animal(ABC):
 4 | 
 5 |     def __init__(self, name: str):
 6 |         self.name = name 
 7 |         super().__init__()
 8 | 
 9 |     @abstractmethod 
10 |     def make_sound(self):
11 |         pass 
12 | 
13 | class Dog(Animal):
14 |     def make_sound(self):
15 |         print(f'{self.name} says: Woof')
16 | 
17 | class Cat(Animal):
18 |     def make_sound(self):
19 |         print(f'{self.name} says: Meows')
20 | 
21 | Dog('Pepper').make_sound()
22 | Cat('Bella').make_sound()


--------------------------------------------------------------------------------
/code_snippets/pandas/df_datetime_comparison.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | df = pd.DataFrame(
 4 |     {"date": pd.date_range(start="2021-7-19", end="2021-7-23"), "value": list(range(5))}
 5 | )
 6 | print(df)
 7 | """
 8 |         date  value
 9 | 0 2021-07-19      0
10 | 1 2021-07-20      1
11 | 2 2021-07-21      2
12 | 3 2021-07-22      3
13 | 4 2021-07-23      4
14 | """
15 | 
16 | filtered_df = df[df.date <= "2021-07-21"]
17 | print(filtered_df)
18 | """
19 |         date  value
20 | 0 2021-07-19      0
21 | 1 2021-07-20      1
22 | 2 2021-07-21      2
23 | """
24 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pytest_fixture.py:
--------------------------------------------------------------------------------
 1 | # pip install textblob
 2 | 
 3 | import pytest 
 4 | from textblob import TextBlob
 5 | 
 6 | def extract_sentiment(text: str):
 7 |     """Extract sentimetn using textblob. Polarity is within range [-1, 1]"""
 8 |     
 9 |     text = TextBlob(text)
10 |     return text.sentiment.polarity
11 | 
12 | @pytest.fixture 
13 | def example_data():
14 |     return 'Today I found a duck and I am happy'
15 | 
16 | def test_extract_sentiment(example_data):
17 |     sentiment = extract_sentiment(example_data)
18 |     assert sentiment > 0


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/datefinder_example.py:
--------------------------------------------------------------------------------
 1 | # pip install datefinder
 2 | 
 3 | from datefinder import find_dates
 4 | 
 5 | text = """"We have one meeting on May 17th,
 6 | 2021 at 9:00am and another meeting on 5/18/2021
 7 | at 10:00. I hope you can attend one of the
 8 | meetings."""
 9 | 
10 | matches = find_dates(text)
11 | 
12 | for match in matches:
13 |     print("Date and time:", match)
14 |     print("Only day:", match.day)
15 | 
16 | """Output:
17 | Date and time: 2021-05-17 09:00:00
18 | Only day: 17
19 | Date and time: 2021-05-18 10:00:00
20 | Only day: 18
21 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/fastai_cont_cat_split.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from fastai.tabular.core import cont_cat_split
 3 | 
 4 | df = pd.DataFrame(
 5 |     {
 6 |         "col1": [1, 2, 3, 4, 5],
 7 |         "col2": ["a", "b", "c", "d", "e"],
 8 |         "col3": [1.0, 2.0, 3.0, 4.0, 5.0],
 9 |     }
10 | )
11 | 
12 | cont_names, cat_names = cont_cat_split(df)
13 | print(cont_names) # ['col3']
14 | print(cat_names) # ['col1', 'col2']
15 | 
16 | cont_names, cat_names = cont_cat_split(df, max_card=3)
17 | print(cont_names) # ['col1', 'col3']
18 | print(cat_names) # ['col2']
19 | 


--------------------------------------------------------------------------------
/code_snippets/python/built_in_functions_speed.py:
--------------------------------------------------------------------------------
 1 | from timeit import timeit
 2 | from numpy.random import randint
 3 | 
 4 | def built_in_sum(l: list):
 5 |     return sum(l)
 6 | 
 7 | def custom_sum(l: list):
 8 |     sum_val = 0
 9 |     for num in l:
10 |         sum_val += num 
11 |     return sum_val
12 | 
13 | l = randint(0, 100, size=100_000)
14 | expSize = 100
15 | 
16 | built_in_time = timeit("built_in_sum(l)", number=expSize, globals=globals())
17 | custom_time = timeit("custom_sum(l)", number=expSize, globals=globals())
18 | print(custom_time/built_in_time)
19 | # 1.2499071011706575


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/texthero_examples.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import texthero as hero
 4 | 
 5 | df = pd.DataFrame(
 6 |     {
 7 |         "text": [
 8 |             "Today is a    beautiful day",
 9 |             "There are 3 ducks in this pond",
10 |             "This is. very cool.",
11 |             np.nan,
12 |         ]
13 |     }
14 | )
15 | 
16 | print(df.text.pipe(hero.clean))
17 | """
18 | 0    today beautiful day
19 | 1             ducks pond
20 | 2                   cool
21 | 3                       
22 | Name: text, dtype: object
23 | """
24 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/get_data_in_year_range.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | from datetime import datetime 
 3 | df = pd.DataFrame({'date': [datetime(2018, 10, 1),
 4 |                             datetime(2019, 10, 1),
 5 |                             datetime(2020, 10, 1)],
 6 |                     'val': [1, 2, 3]}).set_index('date')
 7 | 
 8 | print(df)
 9 | """ 
10 |             val
11 | date           
12 | 2018-10-01    1
13 | 2019-10-01    2
14 | 2020-10-01    3
15 | """
16 | print(df.loc['2019':])
17 | """ 
18 |             val
19 | date           
20 | 2019-10-01    2
21 | 2020-10-01    3
22 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/icecream_datetime.py:
--------------------------------------------------------------------------------
 1 | # pip install icecream
 2 | from datetime import datetime
 3 | from icecream import ic
 4 | import time
 5 | from datetime import datetime
 6 | 
 7 | message = "I don't have prefix"
 8 | ic(message)
 9 | 
10 | def time_format():
11 |     return f'{datetime.now()}|> '
12 | 
13 | ic.configureOutput(prefix=time_format)
14 | for _ in range(3):
15 |     time.sleep(1)
16 |     ic('Hello')
17 | 
18 | """
19 | ic| message: "I don't have prefix"
20 | 2021-06-20 08:12:21.777664|> 'Hello'
21 | 2021-06-20 08:12:22.780577|> 'Hello'
22 | 2021-06-20 08:12:23.782396|> 'Hello'
23 | """


--------------------------------------------------------------------------------
/code_snippets/python/classmethod_example.py:
--------------------------------------------------------------------------------
 1 | class Solver:
 2 |     def __init__(self, nums: list):
 3 |         self.nums = nums
 4 |     
 5 |     @classmethod
 6 |     def get_even(cls, nums: list):
 7 |         return cls([num for num in nums if num % 2 == 0])
 8 |     def print_output(self):
 9 |         print("Result:", self.nums)
10 | 
11 | # Not using class method       
12 | nums = [1, 2, 3, 4, 5, 6, 7]
13 | solver = Solver(nums).print_output()
14 | """
15 | Result: [1, 2, 3, 4, 5, 6, 7]
16 | """
17 | 
18 | solver2 = Solver.get_even(nums)
19 | solver2.print_output()
20 | """
21 | Result: [2, 4, 6]
22 | """
23 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/pygithub_example.py:
--------------------------------------------------------------------------------
 1 | from github import Github 
 2 | 
 3 | # g = Github('user', 'password') # use username and password
 4 | g = Github('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') # or use an access token
 5 | 
 6 | for i, repo in enumerate(g.search_topics('machine learning')):
 7 |     if i < 10:
 8 |         print(repo.name)
 9 | 
10 | """
11 | machine-learning
12 | deep-learning
13 | scikit-learn
14 | jupyter-notebook
15 | scikitlearn-machine-learning
16 | coursera
17 | unsupervised-machine-learning
18 | supervised-machine-learning
19 | coursera-machine-learning
20 | adversarial-machine-learning
21 | """


--------------------------------------------------------------------------------
/code_snippets/python/argparse_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | parser = argparse.ArgumentParser()
 4 | 
 5 | # Add optional argument
 6 | parser.add_argument(
 7 |     "-p",
 8 |     "--Parameter",
 9 |     default=4,  # default value
10 |     type=int,  # data type
11 |     help="Choose the parameter",
12 | )  # description
13 | 
14 | # Read arguments on the command line
15 | args = parser.parse_args()
16 | print(f'Your chosen parameter is {args.Parameter}')
17 | 
18 | """On your terminal
19 | $ python argparse_example.py
20 | Your chosen parameter is 4
21 | 
22 | $ python argparse_example.py -p 5
23 | Your chosen parameter is 5
24 | """


--------------------------------------------------------------------------------
/code_snippets/python/decorator_example.py:
--------------------------------------------------------------------------------
 1 | import time 
 2 | def time_func(func):
 3 |     def wrapper():
 4 |         print("This happens before the function is called")
 5 |         start = time.time()
 6 |         func()
 7 |         print('This happens after the funciton is called')
 8 |         end = time.time()
 9 |         print('The duration is', end - start, 's')
10 | 
11 |     return wrapper
12 | 
13 | @time_func
14 | def say_hello():
15 |     print("hello")
16 | 
17 | say_hello()
18 | """ 
19 | This happens before the function is called
20 | hello
21 | This happens after the funciton is called
22 | The duration is 4.0531158447265625e-06 s
23 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/add_datepart_example.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | from fastai.tabular.core import add_datepart
 3 | from datetime import datetime
 4 | 
 5 | df = pd.DataFrame({'date': [datetime(2020, 2, 5), datetime(2020, 2, 6),
 6 |                             datetime(2020, 2, 7), datetime(2020, 2, 8)],
 7 |                 'val': [1, 2, 3, 4]})
 8 | 
 9 | df = add_datepart(df, 'date')
10 | print(df.columns)                
11 | """ 
12 | Index(['val', 'Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear',
13 |        'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start',
14 |        'Is_year_end', 'Is_year_start', 'Elapsed'],
15 |       dtype='object')
16 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/autoscraper_example.py:
--------------------------------------------------------------------------------
 1 | # pip install autoscraper
 2 | 
 3 | from autoscraper import AutoScraper
 4 | 
 5 | url = 'https://stackoverflow.com/questions/2081586/web-scraping-with-python'
 6 | 
 7 | wanted_list = ['How to check version of python modules?']
 8 | 
 9 | scraper = AutoScraper()
10 | result = scraper.build(url, wanted_list)
11 | 
12 | for res in result:
13 |     print(res)
14 | """ 
15 | How to execute a program or call a system command from Python
16 | What are metaclasses in Python?
17 | Does Python have a ternary conditional operator?
18 | Convert bytes to a string
19 | Does Python have a string 'contains' substring method?
20 | How to check version of python modules?
21 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/pyfiglet_example.py:
--------------------------------------------------------------------------------
 1 | # pip install pyfiglet
 2 | # pip install termcolor
 3 | 
 4 | import pyfiglet
 5 | from termcolor import colored, cprint
 6 | 
 7 | out = pyfiglet.figlet_format("Hello")
 8 | print(out)
 9 | """ 
10 |     __  __     ____    
11 |    / / / /__  / / /___ 
12 |   / /_/ / _ \/ / / __ \
13 |  / __  /  __/ / / /_/ /
14 | /_/ /_/\___/_/_/\____/ 
15 | """
16 | 
17 | out = pyfiglet.figlet_format("Hello", font='slant')
18 | print(out)
19 | 
20 | cprint(pyfiglet.figlet_format('Hello', font='bell'), 'blue')
21 | """ 
22 |  __  __         .    .         
23 |  |   |    ___   |    |     __. 
24 |  |___|  .'   `  |    |   .'   \
25 |  |   |  |----'  |    |   |    |
26 |  /   /  `.___, /\__ /\__  `._.'
27 | """


--------------------------------------------------------------------------------
/code_snippets/pandas/df_merge.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df1 = pd.DataFrame({'left_key': [1, 2, 3], 'a': [4, 5, 6]})
 4 | df2 = pd.DataFrame({'right_key': [1, 2, 3], 'a': [5, 6, 7]})
 5 | print(df1.merge(df2, left_on='left_key', right_on='right_key'))
 6 | """ 
 7 |    left_key  a_x  right_key  a_y
 8 | 0         1    4          1    5
 9 | 1         2    5          2    6
10 | 2         3    6          3    7
11 | """
12 | 
13 | print(df1.merge(df2, left_on='left_key', right_on='right_key',
14 |         suffixes=('_left', '_right')))
15 | """ 
16 |    left_key  a_left  right_key  a_right
17 | 0         1       4          1        5
18 | 1         2       5          2        6
19 | 2         3       6          3        7
20 | """
21 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/df_rolling.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from datetime import date
 3 | 
 4 | df = pd.DataFrame({'date': [date(2021, 1, 20), date(2021, 1, 21), date(2021, 1, 22),
 5 |                             date(2021, 1, 23), date(2021, 1, 24)],
 6 |                     'value': [1, 2, 3, 4, 5]}).set_index('date')
 7 | 
 8 | print(df)
 9 | """ 
10 |             value
11 | date             
12 | 2021-01-20      1
13 | 2021-01-21      2
14 | 2021-01-22      3
15 | 2021-01-23      4
16 | 2021-01-24      5
17 | """
18 | 
19 | print(df.rolling(3).mean())
20 | """ 
21 |             value
22 | date             
23 | 2021-01-20    NaN
24 | 2021-01-21    NaN
25 | 2021-01-22    2.0
26 | 2021-01-23    3.0
27 | 2021-01-24    4.0
28 | """
29 | 


--------------------------------------------------------------------------------
/code_snippets/python/itertools_examples.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | 
 3 | params = {
 4 |     "learning_rate": [1e-1, 1e-2, 1e-3],
 5 |     "batch_size": [16, 32, 64],
 6 | }
 7 | 
 8 | for vals in product(*params.values()):
 9 |     combination = dict(zip(params.keys(), vals))
10 |     print(combination)
11 | """
12 | {'learning_rate': 0.1, 'batch_size': 16}
13 | {'learning_rate': 0.1, 'batch_size': 32}
14 | {'learning_rate': 0.1, 'batch_size': 64}
15 | {'learning_rate': 0.01, 'batch_size': 16}
16 | {'learning_rate': 0.01, 'batch_size': 32}
17 | {'learning_rate': 0.01, 'batch_size': 64}
18 | {'learning_rate': 0.001, 'batch_size': 16}
19 | {'learning_rate': 0.001, 'batch_size': 32}
20 | {'learning_rate': 0.001, 'batch_size': 64}
21 | """
22 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/getme_forecast.sh:
--------------------------------------------------------------------------------
 1 | getme forecast -f
 2 | 
 3 | :'
 4 | Collinsville, United States
 5 | 
 6 | Mon Jun 21 2021  |  12 h
 7 | ☁️  Clouds | Overcast clouds
 8 | Temperature 65.14 °F
 9 | Min. 65.14 °F | Max. 65.14 °F
10 | 
11 | 
12 | Tue Jun 22 2021  |  12 h
13 | ☀️  Clear | Clear sky
14 | Temperature 54.16 °F
15 | Min. 54.16 °F | Max. 54.16 °F
16 | 
17 | 
18 | Wed Jun 23 2021  |  12 h
19 | ☁️  Clouds | Scattered clouds
20 | Temperature 57.18 °F
21 | Min. 57.18 °F | Max. 57.18 °F
22 | 
23 | 
24 | Thu Jun 24 2021  |  12 h
25 | ☀️  Clear | Clear sky
26 | Temperature 63.93 °F
27 | Min. 63.93 °F | Max. 63.93 °F
28 | 
29 | 
30 | Fri Jun 25 2021  |  12 h
31 | 🌦️  Rain | Moderate rain
32 | Temperature 71.78 °F
33 | Min. 71.78 °F | Max. 71.78 
34 | '


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/docopt_example.py:
--------------------------------------------------------------------------------
 1 | """Extract keywords of an input file
 2 | 
 3 | Usage:
 4 |     docopt_example.py --data-dir=<data-directory> [--input-path=<path>]
 5 | 
 6 | Options:
 7 |     --data-dir=<path>    Directory of the data
 8 |     --input-path=<path>  Name of the input file [default: input_text.txt]
 9 | """
10 | 
11 | from docopt import docopt 
12 | 
13 | if __name__ == '__main__':
14 |     args = docopt(__doc__, argv=None, help=True)
15 |     data_dir = args['--data-dir']
16 |     input_path = args['--input-path']
17 | 
18 |     if data_dir:
19 |         print(f"Extracting keywords from {data_dir}/{input_path}")
20 | 
21 | """ 
22 | $ python docopt_example.py --data-dir=Data
23 | Extracting keywords from Data/input_text.txt
24 | """


--------------------------------------------------------------------------------
/code_snippets/pandas/reduce_memory.py:
--------------------------------------------------------------------------------
 1 | from sklearn.datasets import load_iris
 2 | import pandas as pd 
 3 | 
 4 | X, y = load_iris(as_frame=True, return_X_y=True)
 5 | df = pd.concat([X, pd.DataFrame(y, columns=['target'])], axis=1)
 6 | print(df.memory_usage())
 7 | """ 
 8 | Index                 128
 9 | sepal length (cm)    1200
10 | sepal width (cm)     1200
11 | petal length (cm)    1200
12 | petal width (cm)     1200
13 | target               1200
14 | dtype: int64
15 | """ 
16 | df['target'] = df['target'].astype('category')
17 | print(df.memory_usage())
18 | """ 
19 | Index                 128
20 | sepal length (cm)    1200
21 | sepal width (cm)     1200
22 | petal length (cm)    1200
23 | petal width (cm)     1200
24 | target                282
25 | dtype: int64
26 | """
27 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/datapane_example.py:
--------------------------------------------------------------------------------
 1 | # pip install datapane 
 2 | # pip install plotly
 3 | 
 4 | import datapane as dp 
 5 | import pandas as pd 
 6 | import numpy as np 
 7 | import plotly.express as px
 8 | 
 9 | # Scripts to create df and chart 
10 | df = px.data.gapminder()
11 | 
12 | chart = px.scatter(df.query("year==2007"), x="gdpPercap", y="lifeExp",
13 | 	         size="pop", color="continent",
14 |                  hover_name="country", log_x=True, size_max=60)
15 | 
16 | # Once you have the df and the chart, simply use
17 | r = dp.Report(
18 |     dp.Text("my simple report"), # add description
19 |     dp.DataTable(df), # create a table
20 |     dp.Plot(chart) # create a chart
21 | )
22 | 
23 | # Publish your report
24 | r.publish(name='example', visibility = dp.Visibility.PUBLIC)


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/textblob_example.py:
--------------------------------------------------------------------------------
 1 | # pip install textblob
 2 | # python -m textblob.download_corpora
 3 | 
 4 | from textblob import TextBlob
 5 | 
 6 | text = "Today is a beautiful day"
 7 | blob = TextBlob(text)
 8 | 
 9 | print(blob.words) # Word tokenization
10 | # ['Today', 'is', 'a', 'beautiful', 'day']
11 | 
12 | print(blob.noun_phrases) # Noun phrase extraction
13 | # ['beautiful day']
14 | 
15 | print(blob.sentiment) # Sentiment analysis
16 | # Sentiment(polarity=0.85, subjectivity=1.0)
17 | 
18 | print(blob.word_counts) # Word counts
19 | # defaultdict(<class 'int'>, {'today': 1, 'is': 1, 'a': 1, 'beautiful': 1, 'day': 1})
20 | 
21 | # Spelling correction
22 | text = "Today is a beutiful day"
23 | blob = TextBlob(text)
24 | print(blob.correct())
25 | # Today is a beautiful day
26 | 


--------------------------------------------------------------------------------
/code_snippets/python/property_decorator.py:
--------------------------------------------------------------------------------
 1 | class Fruit:
 2 |     def __init__(self, name: str, color: str):
 3 |         self._name = name 
 4 |         self._color = color
 5 |         
 6 |     @property
 7 |     def color(self):
 8 |         print("The color of the fruit is:")
 9 |         return self._color 
10 |     
11 |     @color.setter
12 |     def color(self, value):
13 |         print("Setting value of color...")
14 |         if self._color is None:
15 |             if not isinstance(value, str):
16 |                 raise ValueError('color must be of type string')
17 |             self.color = value 
18 |         else:
19 |             raise AttributeError("Sorry, you cannot change a fruit's color!")
20 |         
21 | fruit = Fruit('apple', 'red')
22 | print(fruit.color)    
23 | fruit.color = 'yellow'
24 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/dataframe_pipe.py:
--------------------------------------------------------------------------------
 1 | # pip install textblob
 2 | import pandas as pd
 3 | from textblob import TextBlob
 4 | 
 5 | def remove_white_space(df: pd.DataFrame):
 6 |     df['text'] = df['text'].apply(lambda row: row.strip())
 7 |     return df
 8 | 
 9 | def get_sentiment(df: pd.DataFrame):
10 |     df['sentiment'] = df['text'].apply(lambda row:
11 |                                     TextBlob(row).sentiment[0])
12 |     return df
13 | 
14 | df = pd.DataFrame({'text': ["It is a beautiful day today  ",
15 |                         "  This movie is terrible"]})
16 | 
17 | df = (df.pipe(remove_white_space)
18 |     .pipe(get_sentiment)
19 | )
20 | 
21 | print(df)
22 | """
23 |                           text  sentiment
24 | 0  It is a beautiful day today       0.85
25 | 1       This movie is terrible      -1.00
26 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/causalimpact_example.py:
--------------------------------------------------------------------------------
 1 | # pip install pycausalimpact
 2 | 
 3 | import numpy as np
 4 | import pandas as pd 
 5 | from statsmodels.tsa.arima_process import ArmaProcess
 6 | from causalimpact import CausalImpact
 7 | 
 8 | # Generate random sample
 9 | 
10 | np.random.seed(0)
11 | ar = np.r_[1, 0.9]
12 | ma = np.array([1])
13 | arma_process = ArmaProcess(ar, ma)
14 | 
15 | X = 50 + arma_process.generate_sample(nsample=1000)
16 | y = 1.6 * X + np.random.normal(size=1000)
17 | 
18 | # There is a change starting from index 800
19 | y[800:] += 10
20 | 
21 | data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X'])
22 | pre_period = [0, 799]
23 | post_period = [800, 999]
24 | 
25 | ci = CausalImpact(data, pre_period, post_period)
26 | print(ci.summary())
27 | print(ci.summary(output='report'))
28 | ci.plot()


--------------------------------------------------------------------------------
/code_snippets/cool_tools/decorator_module.py:
--------------------------------------------------------------------------------
 1 | from decorator import decorator
 2 | from time import time, sleep
 3 | 
 4 | def time_func(func):
 5 |     def wrapper(*args, **kwargs):
 6 |         start_time = time()
 7 |         func(*args, **kwargs)
 8 |         end_time = time()
 9 |         print(
10 |             f"""It takes {round(end_time - start_time, 3)} seconds to execute the function"""
11 |         )
12 |     return wrapper 
13 | 
14 | @decorator
15 | def time_func_with_decorator(func, *args, **kwargs):
16 |     start_time = time()
17 |     func(*args, **kwargs)
18 |     end_time = time()
19 |     print(
20 |             f"""It takes {round(end_time - start_time, 3)} seconds to execute the function"""
21 |         )
22 | @time_func_with_decorator  
23 | def test_func():
24 |     sleep(1)
25 |     
26 | if __name__== '__main__':
27 |     test_func()
28 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/swifter_example.py:
--------------------------------------------------------------------------------
 1 | # pip install swifter
 2 | from time import time
 3 | from sklearn.datasets import fetch_california_housing
 4 | from scipy.special import boxcox1p
 5 | import swifter
 6 | import timeit
 7 | 
 8 | X, y = fetch_california_housing(return_X_y=True, as_frame=True)
 9 | 
10 | def pandas_apply():
11 |     X["AveRooms"].apply(lambda x: boxcox1p(x, 0.25))
12 | 
13 | 
14 | def swifter_apply():
15 |     X["AveRooms"].swifter.apply(lambda x: boxcox1p(x, 0.25))
16 | 
17 | num_experiments = 100
18 | pandas_time = timeit.timeit(pandas_apply, number=num_experiments)
19 | swifter_time = timeit.timeit(swifter_apply, number=num_experiments)
20 | 
21 | pandas_vs_swifter = round(pandas_time/swifter_time, 2)
22 | print(f'Swifter apply is {pandas_vs_swifter} times faster than Pandas apply')
23 | # Swifter apply is 12.54 times faster than Pandas apply


--------------------------------------------------------------------------------
/code_snippets/python/heapq_example.py:
--------------------------------------------------------------------------------
 1 | import heapq
 2 | import random
 3 | from timeit import timeit
 4 | 
 5 | random.seed(0)
 6 | l = random.sample(range(0, 10000), 10000)
 7 | 
 8 | def get_n_max_sorting(l: list, n: int):
 9 |     l = sorted(l, reverse=True)
10 |     return l[:n]
11 | 
12 | def get_n_max_heapq(l: list, n: int):
13 |     return heapq.nlargest(n, l)
14 | 
15 | expSize = 1000
16 | n = 100
17 | time_sorting = timeit("get_n_max_sorting(l, n)", number=expSize,
18 |                         globals=globals())
19 | time_heapq = timeit('get_n_max_heapq(l, n)', number=expSize,
20 |                     globals=globals())
21 | 
22 | ratio = round(time_sorting/time_heapq, 3)
23 | print(f'Run {expSize} experiments. Using heapq is {ratio} times'
24 | 'faster than using sorting')
25 | """ 
26 | Run 1000 experiments. Using heapq is 2.659 timesfaster than using sorting
27 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pipeline_gridsearchcv.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split, GridSearchCV
 2 | from sklearn.preprocessing import StandardScaler
 3 | from sklearn.pipeline import make_pipeline
 4 | from sklearn.svm import SVC 
 5 | from sklearn.datasets import load_iris 
 6 | 
 7 | # load data
 8 | df = load_iris()
 9 | X = df.data
10 | y = df.target 
11 | 
12 | # split data into train and test
13 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
14 | 
15 | # Create a pipeline variable
16 | make_pipe = make_pipeline(StandardScaler(), SVC())
17 | 
18 | # Defining parameters grid
19 | grid_params = {'svc__C': [0.1, 1, 10, 100, 1000], 'svc__gamma': [0.1, 1, 10, 100]}
20 | 
21 | # hypertuning
22 | grid = GridSearchCV(make_pipe, grid_params, cv = 5)
23 | grid.fit(X_train, y_train)
24 | 
25 | # predict
26 | y_pref = grid.predict(X_test)


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/spacy_ngram.py:
--------------------------------------------------------------------------------
 1 | # pip install textacy
 2 | # pip install spacy
 3 | # python -m spacy download en_core_web_sm
 4 | 
 5 | import pandas as pd 
 6 | import spacy 
 7 | from textacy.extract import ngrams
 8 | 
 9 | nlp = spacy.load('en_core_web_sm')
10 | 
11 | text = nlp('Data science is an inter-disciplinary field that uses'
12 | ' scientific methods, processes, algorithms, and systme to extract'
13 | ' knowledge and insights from many structural and unstructured data.')
14 | 
15 | n_grams = 2 # contiguous sequence of a word
16 | min_freq = 1 # extract n -gram based on its frequency
17 | 
18 | print(pd.Series([n.text for n in ngrams(text, n=n_grams, min_freq=1)]).value_counts())
19 | """ 
20 | disciplinary field    1
21 | scientific methods    1
22 | unstructured data     1
23 | Data science          1
24 | extract knowledge     1
25 | uses scientific       1
26 | """


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/pandera_example.py:
--------------------------------------------------------------------------------
 1 | # pip install pandera 
 2 | 
 3 | import pandera as pa
 4 | from pandera import check_input
 5 | import pandas as pd
 6 | 
 7 | df = pd.DataFrame({'col1': [5.0, 8.0, 10.0],
 8 | 					'col2': ['text_1', 'text_2', 'text_3']})
 9 | schema = pa.DataFrameSchema({
10 | 		 "col1": pa.Column(float, pa.Check(lambda minute: 5 <= minute)),
11 | 		 "col2": pa.Column(str, pa.Check.str_startswith("text_"))
12 | })
13 | validated_df = schema(df)
14 | print(validated_df)
15 | """
16 |    col1    col2
17 | 0   5.0  text_1
18 | 1   8.0  text_2
19 | 2  10.0  text_3
20 | """
21 | 
22 | @check_input(schema)
23 | def plus_three(df):
24 | 	df['col1_plus_3'] = df['col1'] + 3
25 | 	return df 
26 | 
27 | print(plus_three(df))
28 | """ 
29 |    col1    col2  col1_plus_3
30 | 0   5.0  text_1          8.0
31 | 1   8.0  text_2         11.0
32 | 2  10.0  text_3         13.0
33 | """


--------------------------------------------------------------------------------
/code_snippets/pandas/select_dtypes.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | 
 3 | df = pd.DataFrame({'col1': ['a', 'b', 'c'], 'col2': [1, 2, 3],
 4 |                     'col3': [0.1, 0.2, 0.3]})
 5 | 
 6 | print(df.info())
 7 | """ 
 8 | <class 'pandas.core.frame.DataFrame'>
 9 | RangeIndex: 3 entries, 0 to 2
10 | Data columns (total 3 columns):
11 |  #   Column  Non-Null Count  Dtype  
12 | ---  ------  --------------  -----  
13 |  0   col1    3 non-null      object 
14 |  1   col2    3 non-null      int64  
15 |  2   col3    3 non-null      float64
16 | dtypes: float64(1), int64(1), object(1)
17 | memory usage: 200.0+ bytes
18 | """
19 | 
20 | print(df.select_dtypes(include=['int64', 'float64']))
21 | """ 
22 |    col2  col3
23 | 0     1   0.1
24 | 1     2   0.2
25 | 2     3   0.3
26 | """ 
27 | 
28 | print(df.select_dtypes(exclude=['object']))
29 | """
30 |    col2  col3
31 | 0     1   0.1
32 | 1     2   0.2
33 | 2     3   0.3
34 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/notion_example.py:
--------------------------------------------------------------------------------
 1 | # pip install notion
 2 | 
 3 | from notion.client import NotionClient
 4 | from notion.block import NumberedListBlock
 5 | 
 6 | client = NotionClient("<token_v2>")
 7 | 
 8 | page = client.get_block("https://www.notion.so/Error-shooting-How-to-3c1904c9869b47118b8656df8b2f8d11")
 9 | 
10 | print(page.title)
11 | # Error shooting / How to
12 | 
13 | page.title = "How to / Error shooting"
14 | 
15 | print(page.title)
16 | # How to / Error shooting
17 | 
18 | for child in page.children:
19 |     print(child.title)
20 | 
21 | """
22 | Linux
23 | Changes to the system
24 | Hydra
25 | Python
26 | WordPress
27 | """
28 | 
29 | new = page.children.add_new(
30 |     NumberedListBlock, title='Item 1'
31 | )
32 | 
33 | for child in page.children:
34 |     print(child.title)
35 | 
36 | """
37 | Linux
38 | Changes to the system
39 | Hydra
40 | Python
41 | WordPress
42 | Item 1
43 | """
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/kedro_example.py:
--------------------------------------------------------------------------------
 1 | from kedro.pipeline import node, Pipeline
 2 | from kedro.io import DataCatalog, MemoryDataSet
 3 | from kedro.runner import SequentialRunner
 4 | 
 5 | # Prepare a data catalog
 6 | data_catalog = DataCatalog({"data.csv": MemoryDataSet()})
 7 | 
 8 | # Prepare first node
 9 | def process_data():
10 |     return f"processed data"
11 | 
12 | process_data_node = node(
13 |     func=process_data, inputs=None, outputs="processed_data"
14 | )
15 | 
16 | def train_model(data: str):
17 |     return f"Training model using {data}"
18 | 
19 | train_model_node = node(
20 |     func=train_model, inputs="processed_data", outputs="trained_model"
21 | )
22 | 
23 | # Assemble nodes into a pipeline
24 | pipeline = Pipeline([process_data_node, train_model_node])
25 | 
26 | # Create a runner to run the pipeline
27 | runner = SequentialRunner()
28 | print(runner.run(pipeline, data_catalog))
29 | # {'trained_model': 'Training model using processed data'}


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/fastai_df_shrink.py:
--------------------------------------------------------------------------------
 1 | # pip install fastai
 2 | 
 3 | from fastai.tabular.core import df_shrink
 4 | import pandas as pd 
 5 | 
 6 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": [1.0, 2.0, 3.0]})
 7 | print(df.info())
 8 | """
 9 | <class 'pandas.core.frame.DataFrame'>
10 | RangeIndex: 3 entries, 0 to 2
11 | Data columns (total 2 columns):
12 |  #   Column  Non-Null Count  Dtype  
13 | ---  ------  --------------  -----  
14 |  0   col1    3 non-null      int64  
15 |  1   col2    3 non-null      float64
16 | dtypes: float64(1), int64(1)
17 | memory usage: 176.0 bytes
18 | """ 
19 | 
20 | new_df = df_shrink(df)
21 | print(new_df.info())
22 | """
23 | <class 'pandas.core.frame.DataFrame'>
24 | RangeIndex: 3 entries, 0 to 2
25 | Data columns (total 2 columns):
26 |  #   Column  Non-Null Count  Dtype  
27 | ---  ------  --------------  -----  
28 |  0   col1    3 non-null      int8   
29 |  1   col2    3 non-null      float32
30 | dtypes: float32(1), int8(1)
31 | memory usage: 143.0 bytes
32 | """


--------------------------------------------------------------------------------
/code_snippets/cool_tools/isort_example.py:
--------------------------------------------------------------------------------
 1 | # Befort isort
 2 | from sklearn.metrics import confusion_matrix, fl_score, classification_report, roc_curve
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.model_selection import GridSearchCV, StratifiedKFold
 5 | from sklearn import svm
 6 | from sklearn.naive_bayes import GaussianNB, MultinomialNB
 7 | from sklearn.neighbors import KNeighborsClassifier
 8 | from sklearn.tree import DecisionTreeClassifier
 9 | from sklearn.model_selection import TimeSeriesSplit
10 | 
11 | # On your terminal: isort name_of_your_file.py 
12 | from sklearn import svm
13 | from sklearn.metrics import (classification_report, confusion_matrix, fl_score,
14 |                              roc_curve)
15 | from sklearn.model_selection import (GridSearchCV, StratifiedKFold,
16 |                                      TimeSeriesSplit, train_test_split)
17 | from sklearn.naive_bayes import GaussianNB, MultinomialNB
18 | from sklearn.neighbors import KNeighborsClassifier
19 | from sklearn.tree import DecisionTreeClassifier
20 | 


--------------------------------------------------------------------------------
/code_snippets/python/singledispatch_example.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | # Without singledispatch
 4 | 
 5 | 
 6 | def process_data(data):
 7 |     if isinstance(data, dict):
 8 |         process_dict(data)
 9 | 
10 |     else:
11 |         process_list(data)
12 | 
13 | 
14 | def process_dict(data: dict):
15 |     print("Dict is processed")
16 | 
17 | 
18 | def process_list(data: list):
19 |     print("List is processed")
20 | 
21 | 
22 | # ---------------------------------------------------------------------------- #
23 | # With singledispatch
24 | 
25 | 
26 | @singledispatch
27 | def process_data2(data):
28 |     raise NotImplementedError("Please implement process_data2")
29 | 
30 | 
31 | @process_data2.register
32 | def process_dict2(data: dict):
33 |     print("Dict is processed")
34 | 
35 | 
36 | @process_data2.register
37 | def process_list2(data: list):
38 |     print("List is processed")
39 | 
40 | 
41 | data = {"a": [1, 2, 3], "b": [4, 5, 6]}
42 | data2 = [{"a": [1, 2, 3]}, {"b": [4, 5, 6]}]
43 | 
44 | process_data2(data)
45 | """Dict is processed"""
46 | 
47 | process_data2(data2)
48 | """List is processed"""
49 | 


--------------------------------------------------------------------------------
/code_snippets/notebook/display_math_equations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "language_info": {
 4 |    "codemirror_mode": {
 5 |     "name": "ipython",
 6 |     "version": 3
 7 |    },
 8 |    "file_extension": ".py",
 9 |    "mimetype": "text/x-python",
10 |    "name": "python",
11 |    "nbconvert_exporter": "python",
12 |    "pygments_lexer": "ipython3",
13 |    "version": "3.8.1"
14 |   },
15 |   "orig_nbformat": 2,
16 |   "kernelspec": {
17 |    "name": "pythonjvsc74a57bd0a09dca2afe15d58e0a8e12d0967f3bf0e997b36a4eb26c84cbca546ff6ad81d4",
18 |    "display_name": "Python 3.8.1  ('venv': venv)"
19 |   },
20 |   "metadata": {
21 |    "interpreter": {
22 |     "hash": "a09dca2afe15d58e0a8e12d0967f3bf0e997b36a4eb26c84cbca546ff6ad81d4"
23 |    }
24 |   }
25 |  },
26 |  "nbformat": 4,
27 |  "nbformat_minor": 2,
28 |  "cells": [
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": 1,
32 |    "metadata": {},
33 |    "outputs": [
34 |     {
35 |      "output_type": "stream",
36 |      "name": "stdout",
37 |      "text": [
38 |       "The equation is:\n"
39 |      ]
40 |     },
41 |     {
42 |      "output_type": "display_data",
43 |      "data": {
44 |       "text/plain": "<IPython.core.display.Math object>",
45 |       "text/latex": "$\\displaystyle y= 3x+5$"
46 |      },
47 |      "metadata": {}
48 |     }
49 |    ],
50 |    "source": [
51 |     "from IPython.display import display, Math, Latex\n",
52 |     "\n",
53 |     "a = 3\n",
54 |     "b = 5\n",
55 |     "print(\"The equation is:\")\n",
56 |     "display(Math(f'y= {a}x+{b}'))"
57 |    ]
58 |   },
59 |   {
60 |    "cell_type": "code",
61 |    "execution_count": null,
62 |    "metadata": {},
63 |    "outputs": [],
64 |    "source": []
65 |   }
66 |  ]
67 | }


--------------------------------------------------------------------------------
/code_snippets/numpy/array_to_latex_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "serial-beatles",
  7 |    "metadata": {
  8 |     "ExecuteTime": {
  9 |      "end_time": "2021-06-23T13:01:26.090538Z",
 10 |      "start_time": "2021-06-23T13:01:20.319340Z"
 11 |     }
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "pip install array_to_latex"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 3,
 21 |    "id": "terminal-tower",
 22 |    "metadata": {
 23 |     "ExecuteTime": {
 24 |      "end_time": "2021-06-23T13:01:31.648086Z",
 25 |      "start_time": "2021-06-23T13:01:31.422198Z"
 26 |     }
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "\\begin{bmatrix}\n",
 34 |       "  1.00 &  2.00 &  3.00\\\\\n",
 35 |       "  4.00 &  5.00 &  6.00\n",
 36 |       "\\end{bmatrix}\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "import numpy as np \n",
 42 |     "import array_to_latex as a2l \n",
 43 |     "\n",
 44 |     "a = np.array([[1, 2, 3], [4, 5, 6]])\n",
 45 |     "latex = a2l.to_ltx(a)\n",
 46 |     "latex"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "id": "parallel-revelation",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "\\begin{bmatrix}\n",
 55 |     "  1.00 &  2.00 &  3.00\\\\\n",
 56 |     "  4.00 &  5.00 &  6.00\n",
 57 |     "\\end{bmatrix}"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "id": "romance-louisiana",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": []
 67 |   }
 68 |  ],
 69 |  "metadata": {
 70 |   "kernelspec": {
 71 |    "display_name": "Python 3",
 72 |    "language": "python",
 73 |    "name": "python3"
 74 |   },
 75 |   "language_info": {
 76 |    "codemirror_mode": {
 77 |     "name": "ipython",
 78 |     "version": 3
 79 |    },
 80 |    "file_extension": ".py",
 81 |    "mimetype": "text/x-python",
 82 |    "name": "python",
 83 |    "nbconvert_exporter": "python",
 84 |    "pygments_lexer": "ipython3",
 85 |    "version": "3.8.1"
 86 |   },
 87 |   "toc": {
 88 |    "base_numbering": 1,
 89 |    "nav_menu": {},
 90 |    "number_sections": true,
 91 |    "sideBar": true,
 92 |    "skip_h1_title": false,
 93 |    "title_cell": "Table of Contents",
 94 |    "title_sidebar": "Contents",
 95 |    "toc_cell": false,
 96 |    "toc_position": {},
 97 |    "toc_section_display": true,
 98 |    "toc_window_display": false
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 5
103 | }
104 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/compare_2_datasets/compare_datasets.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "consolidated-rabbit",
 7 |    "metadata": {
 8 |     "ExecuteTime": {
 9 |      "end_time": "2021-06-20T18:58:20.385118Z",
10 |      "start_time": "2021-06-20T18:58:14.166126Z"
11 |     }
12 |    },
13 |    "outputs": [],
14 |    "source": [
15 |     "pip install sweetviz"
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "code",
20 |    "execution_count": 3,
21 |    "id": "japanese-observer",
22 |    "metadata": {
23 |     "ExecuteTime": {
24 |      "end_time": "2021-06-20T18:58:32.833352Z",
25 |      "start_time": "2021-06-20T18:58:30.121608Z"
26 |     }
27 |    },
28 |    "outputs": [
29 |     {
30 |      "data": {
31 |       "application/vnd.jupyter.widget-view+json": {
32 |        "model_id": "7d5e393d9b13423d8134501d93ca1734",
33 |        "version_major": 2,
34 |        "version_minor": 0
35 |       },
36 |       "text/plain": [
37 |        "                                             |          | [  0%]   00:00 -> (? left)"
38 |       ]
39 |      },
40 |      "metadata": {},
41 |      "output_type": "display_data"
42 |     },
43 |     {
44 |      "name": "stdout",
45 |      "output_type": "stream",
46 |      "text": [
47 |       "Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n"
48 |      ]
49 |     }
50 |    ],
51 |    "source": [
52 |     "from sklearn.datasets import load_iris\n",
53 |     "from sklearn.model_selection import train_test_split\n",
54 |     "import sweetviz as sv \n",
55 |     "\n",
56 |     "X, y = load_iris(return_X_y=True, as_frame=True)\n",
57 |     "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
58 |     "\n",
59 |     "report = sv.compare([X_train, 'train data'], [X_test, 'test data'])\n",
60 |     "report.show_html()"
61 |    ]
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python 3",
67 |    "language": "python",
68 |    "name": "python3"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 3
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython3",
80 |    "version": "3.8.1"
81 |   },
82 |   "toc": {
83 |    "base_numbering": 1,
84 |    "nav_menu": {},
85 |    "number_sections": true,
86 |    "sideBar": true,
87 |    "skip_h1_title": false,
88 |    "title_cell": "Table of Contents",
89 |    "title_sidebar": "Contents",
90 |    "toc_cell": false,
91 |    "toc_position": {},
92 |    "toc_section_display": true,
93 |    "toc_window_display": false
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 5
98 | }
99 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/mito_example/mito_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "id": "aa6d4b81",
  7 |    "metadata": {
  8 |     "ExecuteTime": {
  9 |      "end_time": "2021-09-03T13:53:01.448473Z",
 10 |      "start_time": "2021-09-03T13:53:01.428519Z"
 11 |     }
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "from sklearn.datasets import load_iris\n",
 16 |     "\n",
 17 |     "data = load_iris(as_frame=True)\n",
 18 |     "data['data'].to_csv('iris.csv')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "id": "184e9cd1",
 25 |    "metadata": {
 26 |     "ExecuteTime": {
 27 |      "end_time": "2021-09-03T13:53:10.302258Z",
 28 |      "start_time": "2021-09-03T13:53:10.146695Z"
 29 |     }
 30 |    },
 31 |    "outputs": [
 32 |     {
 33 |      "data": {
 34 |       "application/vnd.jupyter.widget-view+json": {
 35 |        "model_id": "b1dcfcb3c6844bbaa1861f5ff0a91e63",
 36 |        "version_major": 2,
 37 |        "version_minor": 0
 38 |       },
 39 |       "text/plain": [
 40 |        "MitoWidget(analysis_name='UUID-47d43efb-3aaf-40d0-8d93-8b00df165284', code_json='{\"imports\": \"from mitosheet i…"
 41 |       ]
 42 |      },
 43 |      "metadata": {},
 44 |      "output_type": "display_data"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "import mitosheet\n",
 49 |     "mitosheet.sheet()"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "id": "ce01ae76-c0aa-4b49-839e-5511ae4dfd33",
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": []
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "id": "5892beb5-cc54-4fad-a25e-acb572b06b9f",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": []
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "id": "8a39338b",
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": []
 75 |   }
 76 |  ],
 77 |  "metadata": {
 78 |   "kernelspec": {
 79 |    "display_name": "Python 3 (ipykernel)",
 80 |    "language": "python",
 81 |    "name": "python3"
 82 |   },
 83 |   "language_info": {
 84 |    "codemirror_mode": {
 85 |     "name": "ipython",
 86 |     "version": 3
 87 |    },
 88 |    "file_extension": ".py",
 89 |    "mimetype": "text/x-python",
 90 |    "name": "python",
 91 |    "nbconvert_exporter": "python",
 92 |    "pygments_lexer": "ipython3",
 93 |    "version": "3.8.10"
 94 |   },
 95 |   "toc": {
 96 |    "base_numbering": 1,
 97 |    "nav_menu": {},
 98 |    "number_sections": true,
 99 |    "sideBar": true,
100 |    "skip_h1_title": false,
101 |    "title_cell": "Table of Contents",
102 |    "title_sidebar": "Contents",
103 |    "toc_cell": false,
104 |    "toc_position": {},
105 |    "toc_section_display": true,
106 |    "toc_window_display": false
107 |   }
108 |  },
109 |  "nbformat": 4,
110 |  "nbformat_minor": 5
111 | }
112 | 


--------------------------------------------------------------------------------
/code_snippets/cool_tools/interrogate_example.py:
--------------------------------------------------------------------------------
 1 | class Math:
 2 |     def __init__(self, num) -> None:
 3 |         self.num = num
 4 | 
 5 |     def plus_two(self):
 6 |         """Add 2"""
 7 |         return self.num + 2
 8 | 
 9 |     def multiply_three(self):
10 |         return self.num * 3
11 | 
12 | 
13 | """
14 | $ interrogate -vv -i interrogate_example.py
15 | ======================================= Coverage for /Users/khuyen/Python-data-science-code-snippet/code_snippets/cool_tools/ ========================================
16 | ------------------------------------------------------------------------- Detailed Coverage --------------------------------------------------------------------------
17 | | Name                                                                                            |                                                           Status |
18 | |-------------------------------------------------------------------------------------------------|------------------------------------------------------------------|
19 | | interrogate_example.py (module)                                                                 |                                                           MISSED |
20 | |   MathOperation (L1)                                                                            |                                                           MISSED |
21 | |     MathOperation.plus_two (L5)                                                                 |                                                          COVERED |
22 | |     MathOperation.multiply_three (L9)                                                           |                                                           MISSED |
23 | |-------------------------------------------------------------------------------------------------|------------------------------------------------------------------|
24 | 
25 | ------------------------------------------------------------------------------ Summary -------------------------------------------------------------------------------
26 | | Name                                           |                      Total |                      Miss |                      Cover |                      Cover% |
27 | |------------------------------------------------|----------------------------|---------------------------|----------------------------|-----------------------------|
28 | | interrogate_example.py                         |                          4 |                         3 |                          1 |                         25% |
29 | |------------------------------------------------|----------------------------|---------------------------|----------------------------|-----------------------------|
30 | | TOTAL                                          |                          4 |                         3 |                          1 |                       25.0% |
31 | ----------------------------------------------------------- RESULT: FAILED (minimum: 80.0%, actual: 25.0%) -----------------------------------------------------------
32 | """
33 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/mito_example/iris.csv:
--------------------------------------------------------------------------------
  1 | ,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
  2 | 0,5.1,3.5,1.4,0.2
  3 | 1,4.9,3.0,1.4,0.2
  4 | 2,4.7,3.2,1.3,0.2
  5 | 3,4.6,3.1,1.5,0.2
  6 | 4,5.0,3.6,1.4,0.2
  7 | 5,5.4,3.9,1.7,0.4
  8 | 6,4.6,3.4,1.4,0.3
  9 | 7,5.0,3.4,1.5,0.2
 10 | 8,4.4,2.9,1.4,0.2
 11 | 9,4.9,3.1,1.5,0.1
 12 | 10,5.4,3.7,1.5,0.2
 13 | 11,4.8,3.4,1.6,0.2
 14 | 12,4.8,3.0,1.4,0.1
 15 | 13,4.3,3.0,1.1,0.1
 16 | 14,5.8,4.0,1.2,0.2
 17 | 15,5.7,4.4,1.5,0.4
 18 | 16,5.4,3.9,1.3,0.4
 19 | 17,5.1,3.5,1.4,0.3
 20 | 18,5.7,3.8,1.7,0.3
 21 | 19,5.1,3.8,1.5,0.3
 22 | 20,5.4,3.4,1.7,0.2
 23 | 21,5.1,3.7,1.5,0.4
 24 | 22,4.6,3.6,1.0,0.2
 25 | 23,5.1,3.3,1.7,0.5
 26 | 24,4.8,3.4,1.9,0.2
 27 | 25,5.0,3.0,1.6,0.2
 28 | 26,5.0,3.4,1.6,0.4
 29 | 27,5.2,3.5,1.5,0.2
 30 | 28,5.2,3.4,1.4,0.2
 31 | 29,4.7,3.2,1.6,0.2
 32 | 30,4.8,3.1,1.6,0.2
 33 | 31,5.4,3.4,1.5,0.4
 34 | 32,5.2,4.1,1.5,0.1
 35 | 33,5.5,4.2,1.4,0.2
 36 | 34,4.9,3.1,1.5,0.2
 37 | 35,5.0,3.2,1.2,0.2
 38 | 36,5.5,3.5,1.3,0.2
 39 | 37,4.9,3.6,1.4,0.1
 40 | 38,4.4,3.0,1.3,0.2
 41 | 39,5.1,3.4,1.5,0.2
 42 | 40,5.0,3.5,1.3,0.3
 43 | 41,4.5,2.3,1.3,0.3
 44 | 42,4.4,3.2,1.3,0.2
 45 | 43,5.0,3.5,1.6,0.6
 46 | 44,5.1,3.8,1.9,0.4
 47 | 45,4.8,3.0,1.4,0.3
 48 | 46,5.1,3.8,1.6,0.2
 49 | 47,4.6,3.2,1.4,0.2
 50 | 48,5.3,3.7,1.5,0.2
 51 | 49,5.0,3.3,1.4,0.2
 52 | 50,7.0,3.2,4.7,1.4
 53 | 51,6.4,3.2,4.5,1.5
 54 | 52,6.9,3.1,4.9,1.5
 55 | 53,5.5,2.3,4.0,1.3
 56 | 54,6.5,2.8,4.6,1.5
 57 | 55,5.7,2.8,4.5,1.3
 58 | 56,6.3,3.3,4.7,1.6
 59 | 57,4.9,2.4,3.3,1.0
 60 | 58,6.6,2.9,4.6,1.3
 61 | 59,5.2,2.7,3.9,1.4
 62 | 60,5.0,2.0,3.5,1.0
 63 | 61,5.9,3.0,4.2,1.5
 64 | 62,6.0,2.2,4.0,1.0
 65 | 63,6.1,2.9,4.7,1.4
 66 | 64,5.6,2.9,3.6,1.3
 67 | 65,6.7,3.1,4.4,1.4
 68 | 66,5.6,3.0,4.5,1.5
 69 | 67,5.8,2.7,4.1,1.0
 70 | 68,6.2,2.2,4.5,1.5
 71 | 69,5.6,2.5,3.9,1.1
 72 | 70,5.9,3.2,4.8,1.8
 73 | 71,6.1,2.8,4.0,1.3
 74 | 72,6.3,2.5,4.9,1.5
 75 | 73,6.1,2.8,4.7,1.2
 76 | 74,6.4,2.9,4.3,1.3
 77 | 75,6.6,3.0,4.4,1.4
 78 | 76,6.8,2.8,4.8,1.4
 79 | 77,6.7,3.0,5.0,1.7
 80 | 78,6.0,2.9,4.5,1.5
 81 | 79,5.7,2.6,3.5,1.0
 82 | 80,5.5,2.4,3.8,1.1
 83 | 81,5.5,2.4,3.7,1.0
 84 | 82,5.8,2.7,3.9,1.2
 85 | 83,6.0,2.7,5.1,1.6
 86 | 84,5.4,3.0,4.5,1.5
 87 | 85,6.0,3.4,4.5,1.6
 88 | 86,6.7,3.1,4.7,1.5
 89 | 87,6.3,2.3,4.4,1.3
 90 | 88,5.6,3.0,4.1,1.3
 91 | 89,5.5,2.5,4.0,1.3
 92 | 90,5.5,2.6,4.4,1.2
 93 | 91,6.1,3.0,4.6,1.4
 94 | 92,5.8,2.6,4.0,1.2
 95 | 93,5.0,2.3,3.3,1.0
 96 | 94,5.6,2.7,4.2,1.3
 97 | 95,5.7,3.0,4.2,1.2
 98 | 96,5.7,2.9,4.2,1.3
 99 | 97,6.2,2.9,4.3,1.3
100 | 98,5.1,2.5,3.0,1.1
101 | 99,5.7,2.8,4.1,1.3
102 | 100,6.3,3.3,6.0,2.5
103 | 101,5.8,2.7,5.1,1.9
104 | 102,7.1,3.0,5.9,2.1
105 | 103,6.3,2.9,5.6,1.8
106 | 104,6.5,3.0,5.8,2.2
107 | 105,7.6,3.0,6.6,2.1
108 | 106,4.9,2.5,4.5,1.7
109 | 107,7.3,2.9,6.3,1.8
110 | 108,6.7,2.5,5.8,1.8
111 | 109,7.2,3.6,6.1,2.5
112 | 110,6.5,3.2,5.1,2.0
113 | 111,6.4,2.7,5.3,1.9
114 | 112,6.8,3.0,5.5,2.1
115 | 113,5.7,2.5,5.0,2.0
116 | 114,5.8,2.8,5.1,2.4
117 | 115,6.4,3.2,5.3,2.3
118 | 116,6.5,3.0,5.5,1.8
119 | 117,7.7,3.8,6.7,2.2
120 | 118,7.7,2.6,6.9,2.3
121 | 119,6.0,2.2,5.0,1.5
122 | 120,6.9,3.2,5.7,2.3
123 | 121,5.6,2.8,4.9,2.0
124 | 122,7.7,2.8,6.7,2.0
125 | 123,6.3,2.7,4.9,1.8
126 | 124,6.7,3.3,5.7,2.1
127 | 125,7.2,3.2,6.0,1.8
128 | 126,6.2,2.8,4.8,1.8
129 | 127,6.1,3.0,4.9,1.8
130 | 128,6.4,2.8,5.6,2.1
131 | 129,7.2,3.0,5.8,1.6
132 | 130,7.4,2.8,6.1,1.9
133 | 131,7.9,3.8,6.4,2.0
134 | 132,6.4,2.8,5.6,2.2
135 | 133,6.3,2.8,5.1,1.5
136 | 134,6.1,2.6,5.6,1.4
137 | 135,7.7,3.0,6.1,2.3
138 | 136,6.3,3.4,5.6,2.4
139 | 137,6.4,3.1,5.5,1.8
140 | 138,6.0,3.0,4.8,1.8
141 | 139,6.9,3.1,5.4,2.1
142 | 140,6.7,3.1,5.6,2.4
143 | 141,6.9,3.1,5.1,2.3
144 | 142,5.8,2.7,5.1,1.9
145 | 143,6.8,3.2,5.9,2.3
146 | 144,6.7,3.3,5.7,2.5
147 | 145,6.7,3.0,5.2,2.3
148 | 146,6.3,2.5,5.0,1.9
149 | 147,6.5,3.0,5.2,2.0
150 | 148,6.2,3.4,5.4,2.3
151 | 149,5.9,3.0,5.1,1.8
152 | 


--------------------------------------------------------------------------------
/code_snippets/notebook/watermark_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "coated-cyprus",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "pip install watermark "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "emerging-party",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "%load_ext watermark"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 12,
 26 |    "id": "enabling-applicant",
 27 |    "metadata": {
 28 |     "ExecuteTime": {
 29 |      "end_time": "2021-07-07T13:14:52.253902Z",
 30 |      "start_time": "2021-07-07T13:14:52.222631Z"
 31 |     }
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "Last updated: 2021-07-07T08:14:52.226814-05:00\n",
 39 |       "\n",
 40 |       "Python implementation: CPython\n",
 41 |       "Python version       : 3.8.1\n",
 42 |       "IPython version      : 7.24.0\n",
 43 |       "\n",
 44 |       "Compiler    : GCC 7.5.0\n",
 45 |       "OS          : Linux\n",
 46 |       "Release     : 5.4.0-77-generic\n",
 47 |       "Machine     : x86_64\n",
 48 |       "Processor   : x86_64\n",
 49 |       "CPU cores   : 16\n",
 50 |       "Architecture: 64bit\n",
 51 |       "\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "%watermark"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 9,
 62 |    "id": "anonymous-piano",
 63 |    "metadata": {
 64 |     "ExecuteTime": {
 65 |      "end_time": "2021-07-07T13:09:37.538488Z",
 66 |      "start_time": "2021-07-07T13:09:37.086018Z"
 67 |     }
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import numpy as np \n",
 72 |     "import pandas as pd \n",
 73 |     "import sklearn"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 10,
 79 |    "id": "concrete-italy",
 80 |    "metadata": {
 81 |     "ExecuteTime": {
 82 |      "end_time": "2021-07-07T13:09:37.571037Z",
 83 |      "start_time": "2021-07-07T13:09:37.539577Z"
 84 |     }
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "json    : 2.0.9\n",
 92 |       "pandas  : 1.2.3\n",
 93 |       "autopep8: 1.5.7\n",
 94 |       "sklearn : 0.0\n",
 95 |       "numpy   : 1.21.0\n",
 96 |       "isort   : 5.8.0\n",
 97 |       "\n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "%watermark --iversions # Show the versions of libraries being used"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "treated-cleanup",
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": []
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.8.1"
131 |   },
132 |   "toc": {
133 |    "base_numbering": 1,
134 |    "nav_menu": {},
135 |    "number_sections": true,
136 |    "sideBar": true,
137 |    "skip_h1_title": false,
138 |    "title_cell": "Table of Contents",
139 |    "title_sidebar": "Contents",
140 |    "toc_cell": false,
141 |    "toc_position": {},
142 |    "toc_section_display": true,
143 |    "toc_window_display": false
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 5
148 | }
149 | 


--------------------------------------------------------------------------------
/code_snippets/pandas/highlight_pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "id": "therapeutic-binary",
  7 |    "metadata": {
  8 |     "ExecuteTime": {
  9 |      "end_time": "2021-06-20T16:52:56.992148Z",
 10 |      "start_time": "2021-06-20T16:52:56.734486Z"
 11 |     }
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "import pandas as pd "
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 7,
 21 |    "id": "modern-reducing",
 22 |    "metadata": {
 23 |     "ExecuteTime": {
 24 |      "end_time": "2021-06-20T16:54:05.689246Z",
 25 |      "start_time": "2021-06-20T16:54:05.676710Z"
 26 |     }
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "df = pd.DataFrame({'col1': [-5, -2, 1, 4], 'col2': [2, 3, -1, 4]})"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 8,
 36 |    "id": "mighty-edwards",
 37 |    "metadata": {
 38 |     "ExecuteTime": {
 39 |      "end_time": "2021-06-20T16:54:06.012566Z",
 40 |      "start_time": "2021-06-20T16:54:06.005511Z"
 41 |     }
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "def highlight_number(row):\n",
 46 |     "    return [\n",
 47 |     "        'background-color: red; color: white' if cell <= 0\n",
 48 |     "        else 'background-color: green; color: white'\n",
 49 |     "        for cell in row\n",
 50 |     "    ]"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 9,
 56 |    "id": "authentic-crowd",
 57 |    "metadata": {
 58 |     "ExecuteTime": {
 59 |      "end_time": "2021-06-20T16:54:06.443918Z",
 60 |      "start_time": "2021-06-20T16:54:06.419712Z"
 61 |     }
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/html": [
 67 |        "<style  type=\"text/css\" >\n",
 68 |        "#T_dd7e2_row0_col0,#T_dd7e2_row1_col0,#T_dd7e2_row2_col1{\n",
 69 |        "            background-color:  red;\n",
 70 |        "             color:  white;\n",
 71 |        "        }#T_dd7e2_row0_col1,#T_dd7e2_row1_col1,#T_dd7e2_row2_col0,#T_dd7e2_row3_col0,#T_dd7e2_row3_col1{\n",
 72 |        "            background-color:  green;\n",
 73 |        "             color:  white;\n",
 74 |        "        }</style><table id=\"T_dd7e2_\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >col1</th>        <th class=\"col_heading level0 col1\" >col2</th>    </tr></thead><tbody>\n",
 75 |        "                <tr>\n",
 76 |        "                        <th id=\"T_dd7e2_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
 77 |        "                        <td id=\"T_dd7e2_row0_col0\" class=\"data row0 col0\" >-5</td>\n",
 78 |        "                        <td id=\"T_dd7e2_row0_col1\" class=\"data row0 col1\" >2</td>\n",
 79 |        "            </tr>\n",
 80 |        "            <tr>\n",
 81 |        "                        <th id=\"T_dd7e2_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
 82 |        "                        <td id=\"T_dd7e2_row1_col0\" class=\"data row1 col0\" >-2</td>\n",
 83 |        "                        <td id=\"T_dd7e2_row1_col1\" class=\"data row1 col1\" >3</td>\n",
 84 |        "            </tr>\n",
 85 |        "            <tr>\n",
 86 |        "                        <th id=\"T_dd7e2_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
 87 |        "                        <td id=\"T_dd7e2_row2_col0\" class=\"data row2 col0\" >1</td>\n",
 88 |        "                        <td id=\"T_dd7e2_row2_col1\" class=\"data row2 col1\" >-1</td>\n",
 89 |        "            </tr>\n",
 90 |        "            <tr>\n",
 91 |        "                        <th id=\"T_dd7e2_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
 92 |        "                        <td id=\"T_dd7e2_row3_col0\" class=\"data row3 col0\" >4</td>\n",
 93 |        "                        <td id=\"T_dd7e2_row3_col1\" class=\"data row3 col1\" >4</td>\n",
 94 |        "            </tr>\n",
 95 |        "    </tbody></table>"
 96 |       ],
 97 |       "text/plain": [
 98 |        "<pandas.io.formats.style.Styler at 0x7fb03c6bb9d0>"
 99 |       ]
100 |      },
101 |      "execution_count": 9,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "df.style.apply(highlight_number)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "id": "durable-speed",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": []
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.8.1"
136 |   },
137 |   "toc": {
138 |    "base_numbering": 1,
139 |    "nav_menu": {},
140 |    "number_sections": true,
141 |    "sideBar": true,
142 |    "skip_h1_title": false,
143 |    "title_cell": "Table of Contents",
144 |    "title_sidebar": "Contents",
145 |    "toc_cell": false,
146 |    "toc_position": {},
147 |    "toc_section_display": true,
148 |    "toc_window_display": false
149 |   }
150 |  },
151 |  "nbformat": 4,
152 |  "nbformat_minor": 5
153 | }
154 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/newspaper3k.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "unnecessary-gilbert",
  7 |    "metadata": {
  8 |     "ExecuteTime": {
  9 |      "end_time": "2021-07-10T13:22:43.082803Z",
 10 |      "start_time": "2021-07-10T13:22:28.715292Z"
 11 |     }
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "pip install newspaper3k"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 3,
 21 |    "id": "bulgarian-austria",
 22 |    "metadata": {
 23 |     "ExecuteTime": {
 24 |      "end_time": "2021-07-10T13:23:21.852155Z",
 25 |      "start_time": "2021-07-10T13:23:18.996409Z"
 26 |     }
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stderr",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "[nltk_data] Downloading package punkt to /home/user/nltk_data...\n",
 34 |       "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
 35 |      ]
 36 |     },
 37 |     {
 38 |      "data": {
 39 |       "text/plain": [
 40 |        "True"
 41 |       ]
 42 |      },
 43 |      "execution_count": 3,
 44 |      "metadata": {},
 45 |      "output_type": "execute_result"
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "from newspaper import Article\n",
 50 |     "import nltk\n",
 51 |     "nltk.download('punkt')"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 4,
 57 |    "id": "municipal-strain",
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2021-07-10T13:24:11.568005Z",
 61 |      "start_time": "2021-07-10T13:24:11.255995Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "url = \"https://www.dataquest.io/blog/learn-data-science/\"\n",
 67 |     "article = Article(url)\n",
 68 |     "article.download()\n",
 69 |     "article.parse()"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 6,
 75 |    "id": "roman-postage",
 76 |    "metadata": {
 77 |     "ExecuteTime": {
 78 |      "end_time": "2021-07-10T13:24:21.243417Z",
 79 |      "start_time": "2021-07-10T13:24:21.240287Z"
 80 |     }
 81 |    },
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "'How to Learn Data Science (A step-by-step guide)'"
 87 |       ]
 88 |      },
 89 |      "execution_count": 6,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "article.title"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "id": "brave-sussex",
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2021-07-10T13:24:27.866249Z",
105 |      "start_time": "2021-07-10T13:24:27.856273Z"
106 |     }
107 |    },
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "text/plain": [
112 |        "datetime.datetime(2020, 5, 4, 7, 1, tzinfo=tzutc())"
113 |       ]
114 |      },
115 |      "execution_count": 7,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "article.publish_date"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 8,
127 |    "id": "behind-familiar",
128 |    "metadata": {
129 |     "ExecuteTime": {
130 |      "end_time": "2021-07-10T13:24:34.598082Z",
131 |      "start_time": "2021-07-10T13:24:34.594198Z"
132 |     }
133 |    },
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "'https://www.dataquest.io/wp-content/uploads/2020/05/learn-data-science.jpg'"
139 |       ]
140 |      },
141 |      "execution_count": 8,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "article.top_image"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 9,
153 |    "id": "formed-declaration",
154 |    "metadata": {
155 |     "ExecuteTime": {
156 |      "end_time": "2021-07-10T13:24:41.152427Z",
157 |      "start_time": "2021-07-10T13:24:41.112948Z"
158 |     }
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "article.nlp()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 12,
168 |    "id": "olympic-conditioning",
169 |    "metadata": {
170 |     "ExecuteTime": {
171 |      "end_time": "2021-07-10T13:33:00.606867Z",
172 |      "start_time": "2021-07-10T13:33:00.602999Z"
173 |     }
174 |    },
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "'How to Learn Data Science (A step-by-step guide)There’s no doubt about it: data scientists are in high demand.\\nHow to Learn Data ScienceSo how do you start to learn data science?\\nIf you want to learn data science or just pick up some data science skills, your first goal should be to learn to love data.\\nRather, consider it as a rough set of guidelines to follow as you learn data science on your own path.\\nI personally believe that anyone can learn data science if they approach it with the right frame of mind.'"
180 |       ]
181 |      },
182 |      "execution_count": 12,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "article.summary"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 10,
194 |    "id": "gentle-princeton",
195 |    "metadata": {
196 |     "ExecuteTime": {
197 |      "end_time": "2021-07-10T13:24:46.465668Z",
198 |      "start_time": "2021-07-10T13:24:46.462253Z"
199 |     }
200 |    },
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "['learn',\n",
206 |        " 'skills',\n",
207 |        " 'work',\n",
208 |        " 'stepbystep',\n",
209 |        " 'youll',\n",
210 |        " 'guide',\n",
211 |        " 'learning',\n",
212 |        " 'need',\n",
213 |        " 'science',\n",
214 |        " 'data',\n",
215 |        " 'youre',\n",
216 |        " 'scientists']"
217 |       ]
218 |      },
219 |      "execution_count": 10,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "article.keywords"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "accompanied-strengthening",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": []
235 |   }
236 |  ],
237 |  "metadata": {
238 |   "kernelspec": {
239 |    "display_name": "Python 3",
240 |    "language": "python",
241 |    "name": "python3"
242 |   },
243 |   "language_info": {
244 |    "codemirror_mode": {
245 |     "name": "ipython",
246 |     "version": 3
247 |    },
248 |    "file_extension": ".py",
249 |    "mimetype": "text/x-python",
250 |    "name": "python",
251 |    "nbconvert_exporter": "python",
252 |    "pygments_lexer": "ipython3",
253 |    "version": "3.8.1"
254 |   },
255 |   "toc": {
256 |    "base_numbering": 1,
257 |    "nav_menu": {},
258 |    "number_sections": true,
259 |    "sideBar": true,
260 |    "skip_h1_title": false,
261 |    "title_cell": "Table of Contents",
262 |    "title_sidebar": "Contents",
263 |    "toc_cell": false,
264 |    "toc_position": {},
265 |    "toc_section_display": true,
266 |    "toc_window_display": false
267 |   }
268 |  },
269 |  "nbformat": 4,
270 |  "nbformat_minor": 5
271 | }
272 | 


--------------------------------------------------------------------------------
/code_snippets/data_science_tools/folium_example.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "juvenile-consortium",
 7 |    "metadata": {
 8 |     "ExecuteTime": {
 9 |      "end_time": "2021-06-20T15:40:03.652006Z",
10 |      "start_time": "2021-06-20T15:40:00.949060Z"
11 |     }
12 |    },
13 |    "outputs": [],
14 |    "source": [
15 |     "pip install folium "
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "code",
20 |    "execution_count": 3,
21 |    "id": "assigned-conservative",
22 |    "metadata": {
23 |     "ExecuteTime": {
24 |      "end_time": "2021-06-20T15:41:42.638978Z",
25 |      "start_time": "2021-06-20T15:41:42.627499Z"
26 |     }
27 |    },
28 |    "outputs": [
29 |     {
30 |      "data": {
31 |       "text/html": [
32 |        "<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><span style=\"color:#565656\">Make this Notebook Trusted to load map: File -> Trust Notebook</span><iframe src=\"about:blank\" style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;\" data-html=%3C%21DOCTYPE%20html%3E%0A%3Chead%3E%20%20%20%20%0A%20%20%20%20%3Cmeta%20http-equiv%3D%22content-type%22%20content%3D%22text/html%3B%20charset%3DUTF-8%22%20/%3E%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%3Cscript%3E%0A%20%20%20%20%20%20%20%20%20%20%20%20L_NO_TOUCH%20%3D%20false%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20L_DISABLE_3D%20%3D%20false%3B%0A%20%20%20%20%20%20%20%20%3C/script%3E%0A%20%20%20%20%0A%20%20%20%20%3Cstyle%3Ehtml%2C%20body%20%7Bwidth%3A%20100%25%3Bheight%3A%20100%25%3Bmargin%3A%200%3Bpadding%3A%200%3B%7D%3C/style%3E%0A%20%20%20%20%3Cstyle%3E%23map%20%7Bposition%3Aabsolute%3Btop%3A0%3Bbottom%3A0%3Bright%3A0%3Bleft%3A0%3B%7D%3C/style%3E%0A%20%20%20%20%3Cscript%20src%3D%22https%3A//cdn.jsdelivr.net/npm/leaflet%401.6.0/dist/leaflet.js%22%3E%3C/script%3E%0A%20%20%20%20%3Cscript%20src%3D%22https%3A//code.jquery.com/jquery-1.12.4.min.js%22%3E%3C/script%3E%0A%20%20%20%20%3Cscript%20src%3D%22https%3A//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js%22%3E%3C/script%3E%0A%20%20%20%20%3Cscript%20src%3D%22https%3A//cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js%22%3E%3C/script%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//cdn.jsdelivr.net/npm/leaflet%401.6.0/dist/leaflet.css%22/%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css%22/%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap-theme.min.css%22/%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css%22/%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css%22/%3E%0A%20%20%20%20%3Clink%20rel%3D%22stylesheet%22%20href%3D%22https%3A//cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css%22/%3E%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%3Cmeta%20name%3D%22viewport%22%20content%3D%22width%3Ddevice-width%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20initial-scale%3D1.0%2C%20maximum-scale%3D1.0%2C%20user-scalable%3Dno%22%20/%3E%0A%20%20%20%20%20%20%20%20%20%20%20%20%3Cstyle%3E%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%23map_176b31c3edc74994929407bbb683ea5c%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20position%3A%20relative%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20width%3A%20100.0%25%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20height%3A%20100.0%25%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20left%3A%200.0%25%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20top%3A%200.0%25%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%3C/style%3E%0A%20%20%20%20%20%20%20%20%0A%3C/head%3E%0A%3Cbody%3E%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%3Cdiv%20class%3D%22folium-map%22%20id%3D%22map_176b31c3edc74994929407bbb683ea5c%22%20%3E%3C/div%3E%0A%20%20%20%20%20%20%20%20%0A%3C/body%3E%0A%3Cscript%3E%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20var%20map_176b31c3edc74994929407bbb683ea5c%20%3D%20L.map%28%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%22map_176b31c3edc74994929407bbb683ea5c%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20center%3A%20%5B45.5236%2C%20-122.675%5D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20crs%3A%20L.CRS.EPSG3857%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20zoom%3A%2010%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20zoomControl%3A%20true%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20preferCanvas%3A%20false%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%29%3B%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20%0A%0A%20%20%20%20%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20var%20tile_layer_5f994105e8a74232936b87fb635d5dd1%20%3D%20L.tileLayer%28%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%22https%3A//%7Bs%7D.tile.openstreetmap.org/%7Bz%7D/%7Bx%7D/%7By%7D.png%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%22attribution%22%3A%20%22Data%20by%20%5Cu0026copy%3B%20%5Cu003ca%20href%3D%5C%22http%3A//openstreetmap.org%5C%22%5Cu003eOpenStreetMap%5Cu003c/a%5Cu003e%2C%20under%20%5Cu003ca%20href%3D%5C%22http%3A//www.openstreetmap.org/copyright%5C%22%5Cu003eODbL%5Cu003c/a%5Cu003e.%22%2C%20%22detectRetina%22%3A%20false%2C%20%22maxNativeZoom%22%3A%2018%2C%20%22maxZoom%22%3A%2018%2C%20%22minZoom%22%3A%200%2C%20%22noWrap%22%3A%20false%2C%20%22opacity%22%3A%201%2C%20%22subdomains%22%3A%20%22abc%22%2C%20%22tms%22%3A%20false%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%29.addTo%28map_176b31c3edc74994929407bbb683ea5c%29%3B%0A%20%20%20%20%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20var%20marker_eca8720e4a6b4e9883d2708a23c2a4c3%20%3D%20L.marker%28%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5B45.3288%2C%20-121.6625%5D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%29.addTo%28map_176b31c3edc74994929407bbb683ea5c%29%3B%0A%20%20%20%20%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20var%20popup_c7124ca5f5c643b4a798b64f2ca8d683%20%3D%20L.popup%28%7B%22maxWidth%22%3A%20%22100%25%22%7D%29%3B%0A%0A%20%20%20%20%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20var%20html_d2343319179b495cabc052b4ec70efb1%20%3D%20%24%28%60%3Cdiv%20id%3D%22html_d2343319179b495cabc052b4ec70efb1%22%20style%3D%22width%3A%20100.0%25%3B%20height%3A%20100.0%25%3B%22%3E%3Ci%3EMt.%20Hood%20Meadows%3C/i%3E%3C/div%3E%60%29%5B0%5D%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20popup_c7124ca5f5c643b4a798b64f2ca8d683.setContent%28html_d2343319179b495cabc052b4ec70efb1%29%3B%0A%20%20%20%20%20%20%20%20%0A%0A%20%20%20%20%20%20%20%20marker_eca8720e4a6b4e9883d2708a23c2a4c3.bindPopup%28popup_c7124ca5f5c643b4a798b64f2ca8d683%29%0A%20%20%20%20%20%20%20%20%3B%0A%0A%20%20%20%20%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20marker_eca8720e4a6b4e9883d2708a23c2a4c3.bindTooltip%28%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%60%3Cdiv%3E%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20Click%20me%21%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%3C/div%3E%60%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%22sticky%22%3A%20true%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%29%3B%0A%20%20%20%20%20%20%20%20%0A%3C/script%3E onload=\"this.contentDocument.open();this.contentDocument.write(    decodeURIComponent(this.getAttribute('data-html')));this.contentDocument.close();\" allowfullscreen webkitallowfullscreen mozallowfullscreen></iframe></div></div>"
33 |       ],
34 |       "text/plain": [
35 |        "<folium.folium.Map at 0x7fd3bf204ac0>"
36 |       ]
37 |      },
38 |      "execution_count": 3,
39 |      "metadata": {},
40 |      "output_type": "execute_result"
41 |     }
42 |    ],
43 |    "source": [
44 |     "import folium\n",
45 |     "m = folium.Map(location=[45.5236, -122.6750])\n",
46 |     "\n",
47 |     "tooltip = 'Click me!'\n",
48 |     "folium.Marker([45.3288, -121.6625], popup='<i>Mt. Hood Meadows</i>',\n",
49 |     "              tooltip=tooltip).add_to(m)\n",
50 |     "m "
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "id": "occasional-sudan",
57 |    "metadata": {},
58 |    "outputs": [],
59 |    "source": []
60 |   }
61 |  ],
62 |  "metadata": {
63 |   "kernelspec": {
64 |    "display_name": "Python 3",
65 |    "language": "python",
66 |    "name": "python3"
67 |   },
68 |   "language_info": {
69 |    "codemirror_mode": {
70 |     "name": "ipython",
71 |     "version": 3
72 |    },
73 |    "file_extension": ".py",
74 |    "mimetype": "text/x-python",
75 |    "name": "python",
76 |    "nbconvert_exporter": "python",
77 |    "pygments_lexer": "ipython3",
78 |    "version": "3.8.1"
79 |   },
80 |   "toc": {
81 |    "base_numbering": 1,
82 |    "nav_menu": {},
83 |    "number_sections": true,
84 |    "sideBar": true,
85 |    "skip_h1_title": false,
86 |    "title_cell": "Table of Contents",
87 |    "title_sidebar": "Contents",
88 |    "toc_cell": false,
89 |    "toc_position": {},
90 |    "toc_section_display": true,
91 |    "toc_window_display": false
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 5
96 | }
97 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python and Data Science Code Snippets
  2 | Source code of Python and data science snippets posted daily at [Data Science Simplified](https://mathdatasimplified.com/). You can receive these daily tips in your mailbox for free by [subscribing to the website](https://mathdatasimplified.com/).
  3 | 
  4 | To get access to these daily tips on the command line, install [python-snippet](https://github.com/khuyentran1401/python_snippet).
  5 | 
  6 | # Contents
  7 | * [Python Built-in Methods](#python)
  8 | * [Pandas](#pandas)
  9 | * [Numpy](#numpy)
 10 | * [Data Science Tools](#data-science-tools)
 11 | * [Terminal](#terminal)
 12 | * [Cool Tools](#cool-tools)
 13 | * [Jupyter Notebook](#jupyter-notebook)
 14 | 
 15 | 
 16 | <h1 id='python'> Python Built-in Methods <img src="images/python.png"> </h1>
 17 | 
 18 | ### Number
 19 | 
 20 | | Title        | Explanation | Code  |
 21 | | ------------- |:-------------:| :-----:|
 22 | | Get Multiples of a Number Using Modulus | [link](https://mathdatasimplified.com/2021/04/22/get-multiples-of-a-numbers-using-modulus/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/multiples_of_a_number.py)
 23 |  | fractions: Get Numerical Results in Fractions instead of Decimals | [link](https://mathdatasimplified.com/2021/02/27/fractions-get-numerical-results-in-fractions-instead-of-decimals/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/fractions_example.py)
 24 | | How to Use Underscores to Format Large Numbers in Python | [link](https://mathdatasimplified.com/2021/01/12/how-to-use-underscores-to-format-large-numbers-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/underscore_large_number.py)
 25 |  | Confirm whether a variable is a number | [link](https://mathdatasimplified.com/2020/11/23/confirm-whether-a-variable-is-a-number/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/check_if_number.py)
 26 |  | Get a Division, Floor Division, And The Remainder of a Division in Python | [link](https://mathdatasimplified.com/2021/08/31/get-a-division-floor-division-and-the-remainder-of-a-division-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/division_operators.py)
 27 | 
 28 | ### Boolean
 29 | 
 30 | | Title        | Explanation | Code  |
 31 | | ------------- |:-------------:| :-----:|
 32 | | Boolean Operators: Connect Two Boolean Expressions into One Expression | [link](https://mathdatasimplified.com/2021/05/15/boolean-operators-connect-two-boolean-expressions-into-one-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/boolean_operators.py)
 33 | 
 34 | ### String
 35 | 
 36 | | Title        | Explanation | Code  |
 37 | | ------------- |:-------------:| :-----:|
 38 | | `__str__` and `__repr__`: Create a String Representation of a Python Object | [link](https://mathdatasimplified.com/2021/05/11/__str__-and-__repr__-create-a-string-representation-of-a-python-bbject/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/__str__and__repr.py)
 39 | | String find: Find the Index of a Substring in a Python String | [link](https://mathdatasimplified.com/2021/05/01/string-find-find-the-index-of-a-substring-in-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/string_find.py)
 40 | | eval: Turn a Python String into a Variable or Function | [link](https://mathdatasimplified.com/2021/03/13/eval-turn-a-python-string-into-a-variable-or-function/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/eval_example.py)
 41 | | re.sub: Replace One String with Another String Using Regular Expression | [link](https://mathdatasimplified.com/2021/03/07/re-sub-replace-one-string-with-another-string-using-regular-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/re_sub_example.py)
 42 | 
 43 | ### List 
 44 | 
 45 | | Title        | Explanation | Code  |
 46 | | ------------- |:-------------:| :-----:|
 47 | | any: Check if Any Element of an Iterable is True | [link](https://mathdatasimplified.com/2021/06/01/any-check-if-any-element-of-an-iterable-is-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/any_example.py)
 48 | | Extended Iterable Unpacking: Ignore Multiple Values when Unpacking a Python Iterable | [link](https://mathdatasimplified.com/2021/05/03/extended-iterable-unpacking-ignore-multiple-values-when-unpacking-a-python-iterable/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/extended_iterable_unpacking.py)
 49 | | How to Unpack Iterables in Python | [link](https://mathdatasimplified.com/2021/04/07/how-to-unpack-iterables-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/unpack_iterables.py)
 50 | | random.choice: Get a Randomly Selected Element from a Python List | [link](https://mathdatasimplified.com/2021/02/28/random-choice-get-a-randomly-selected-element-from-a-python-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/random_choice.py)
 51 | | random.sample: Get n Random Elements From a List | [link](https://mathdatasimplified.com/2021/09/09/random-sample-get-n-random-elements-from-a-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/random_sample.py)
 52 | | filter: Get the Elements of an Iterable that a Function Returns True | [link](https://mathdatasimplified.com/2021/06/11/filter-get-the-elements-of-an-iterable-that-a-function-returns-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/filter_example.py)
 53 | | heapq: Find n Max Values of a Python List | [link](https://mathdatasimplified.com/2021/03/28/heapq-find-n-max-values-of-a-python-list/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/heapq_example.py)
 54 | | join method: Turn an Iterable into a Python String | [link](https://mathdatasimplified.com/2021/06/17/join-method-turn-an-iterable-to-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/join_list.py)
 55 | | Zip: Associate Elements from Two Iterators based on the Order | [link](https://mathdatasimplified.com/2021/02/05/zip-associate-elements-from-two-iterators-based-on-the-order/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/zip_example.py)
 56 | | collections.Counter: Count the Occurrences of Items in a List |[link](https://mathdatasimplified.com/2021/02/02/collections-counter-count-the-occurrences-of-items-in-a-list/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_counter.py)
 57 | | Zip Function: Create Pairs of Elements from Two Lists in Python | [link](https://mathdatasimplified.com/2021/01/11/zip-function-create-pairs-of-elements-from-two-lists-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/zip_function.py)
 58 | | Stop using = operator to create a copy of a Python list. Use copy method instead | [link](https://mathdatasimplified.com/2021/01/09/stop-using-operator-to-create-a-copy-of-a-python-list-use-copy-method-instead/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/copy_method.py) 
 59 | | itertools.combinations: A better way to iterate through a pair of values in a Python list | [link](https://mathdatasimplified.com/2020/12/12/itertools-combinations-a-better-way-to-iterate-through-a-pair-of-values-in-a-python-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/itertools_combinations_example.py)
 60 | | itertools.product: Nested For-Loops in a Generator Expression | [link](https://mathdatasimplified.com/2021/08/03/itertools-product-nested-for-loops-in-a-generator-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/itertools_examples.py)
 61 | | itertools.islice: Get Items From an Iterable That are Within a Certain Range With a Specific Incrementation | [link](https://mathdatasimplified.com/2021/09/07/itertools-islice-get-items-from-an-iterable-that-are-within-a-certain-range-with-a-specific-incrementation/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/itertools_islice.py)
 62 | | Enumerate | [link](https://mathdatasimplified.com/2020/11/23/enumerate/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/enumerate_example.py)
 63 | | set.intersection: Find the Intersection Between 2 Sets | [link](https://mathdatasimplified.com/2021/06/29/set-intersection-find-the-intersection-between-2-sets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/set_intersection.py)
 64 | | Set Difference: Find the Difference Between 2 Sets | [link](https://mathdatasimplified.com/2021/08/26/set-difference-find-the-difference-between-2-sets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/set_difference.py)
 65 | | Difference between list append and list extend | [link](https://mathdatasimplified.com/2021/07/27/difference-between-list-append-and-list-extend/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_extend.py)
 66 | | map method: Apply a Function to Each Item of an Iterable | [link](https://mathdatasimplified.com/2021/08/19/map-method-apply-a-function-to-each-item-of-an-iterable/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_map.py)
 67 | | Why Should you Rewrite a For-Loop as a List Comprehension? | [link](https://mathdatasimplified.com/2021/08/17/why-should-you-rewrite-a-for-loop-as-a-list-comprehension/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_comprehension.py)
 68 | 
 69 | ### Tuple
 70 | 
 71 | | Title        | Explanation | Code  |
 72 | | ------------- |:-------------:| :-----:|
 73 |  | namedtuple: A Lightweight Python Structure to Mange your Data | [link](https://mathdatasimplified.com/2021/02/22/namedtuple-a-lightweight-python-structure-to-mange-your-data/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/namedtuple_example.py)
 74 |   | slice: Make your Indices more Readable by Naming your Slice | [link](https://mathdatasimplified.com/2021/02/16/slice-make-your-indices-more-readable-by-naming-your-slice/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/slice_example.py)
 75 | 
 76 | ### Dictionary
 77 | 
 78 | | Title        | Explanation | Code  |
 79 | | ------------- |:-------------:| :-----:|
 80 | | Defaultdict: Return a default value when a key is not available | [link](https://mathdatasimplified.com/2020/12/09/how-to-return-a-default-value-when-a-key-is-not-in-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_defaultdict.py)
 81 | | Defaultdict: Create a Dictionary with Values that are List | [link](https://mathdatasimplified.com/2021/07/22/defaultdict-create-a-dictionary-with-the-values-that-are-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/defaultdict_example.py)
 82 | | Ordered dictionary in Python | [link](https://mathdatasimplified.com/2020/11/23/ordered-dictionary-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_ordereddict.py)
 83 | 
 84 | ### Datetime
 85 | 
 86 | | Title        | Explanation | Code  |
 87 | | ------------- |:-------------:| :-----:|
 88 | | datetime + timedelta: Calculate End DateTime based on Start DateTime and Duration | [link](https://mathdatasimplified.com/2021/03/04/datetime-timedelta-calculate-end-datetime-based-on-start-datetime-and-duration/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/datetime_timedelta.py)
 89 |  | Use Dates in a Month as the Feature | [link](https://mathdatasimplified.com/2020/11/23/use-dates-in-a-month-as-the-feature/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/dates_in_month_as_feature.py)
 90 | 
 91 | ### Function
 92 | 
 93 | | Title        | Explanation | Code  |
 94 | | ------------- |:-------------:| :-----:|
 95 | | *iterator: Pass Values of an Iterator to a Function | [link](https://mathdatasimplified.com/2021/05/05/iterator-pass-values-of-an-iterator-to-a-function/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/args_example.py)
 96 | | Use Python Built-in Functions to Speed your Code | [link](https://mathdatasimplified.com/2021/01/29/use-python-built-in-functions-to-speed-your-code/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/built_in_functions_speed.py)
 97 | | **kwargs: Pass multiple arguments to a function in Python |[link](https://mathdatasimplified.com/2020/12/26/kwargs-pass-multiple-arguments-to-a-function-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/kwargs.py)
 98 | | Return Multiple Values from a Function Using Python Dictionary | [link](https://mathdatasimplified.com/2020/12/11/return-multiple-values-from-a-function-using-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/return_multiple_values_with_dictionary.py)
 99 | | Decorator in Python| [link](https://mathdatasimplified.com/2020/11/25/decorator-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/decorator_example.py)
100 | | functools.partial: Generate a New Function with Fewer Arguments | [link](https://mathdatasimplified.com/2021/07/06/functools-partial-generate-a-new-function-with-fewer-arguments/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/partial_function.py)
101 | | singledispatch: Call Another Function Based on the Type of the Current Function’s Argument | [link](https://mathdatasimplified.com/2021/09/02/singledispatch-call-another-function-based-on-the-type-of-the-current-functions-argument/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/singledispatch_example.py)
102 | 
103 | ### Classes
104 | 
105 | | Title        | Explanation | Code  |
106 | | ------------- |:-------------:| :-----:|
107 | | Abstract Classes: Declare Methods without Implementation | [link](https://mathdatasimplified.com/2021/06/08/abstract-classes-declare-methods-without-implementation/)  | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/abc_example.py) |
108 | | classmethod: What is it and When to Use it | [link](https://mathdatasimplified.com/2021/04/24/classmethod-what-is-it-and-when-to-use-it/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/classmethod_example.py)
109 |  | getattr: a Better Way to Get the Attribute of a Class | [link](https://mathdatasimplified.com/2021/02/23/getattr-a-better-way-to-get-the-attribute-of-a-class/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/getattr_example.py)
110 |  | `__call__`: You can Call your Class Instance like a Function. Here is how | [link](https://mathdatasimplified.com/2021/01/22/__call__-you-can-call-your-class-instance-like-a-function-here-is-how/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/__call__example.py)
111 |  | Static method: use the function without adding the attributes required for a new instance | [link](https://mathdatasimplified.com/2020/11/23/static-method-use-the-function-without-adding-the-attributes-required-for-a-new-instance/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/staticmethod_example.py)
112 |  | Property Decorator: A Pythonic Way to Use Getters and Setters | [link](https://mathdatasimplified.com/2021/07/01/property-decorator-a-pythonic-way-to-use-getters-and-setters/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/property_decorator.py)
113 | 
114 | ### Files
115 | 
116 | | Title        | Explanation | Code  |
117 | | ------------- |:-------------:| :-----:|
118 | | Shutil: Move Files in Python | [link](https://mathdatasimplified.com/2021/06/03/shutil-move-files-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/move_files/move_files.py)
119 | | pathlith.Path | [link](https://mathdatasimplified.com/2020/11/23/pathlith-path/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_path)
120 | | pathlib: Create, Write, and Rename Files in One Line of Code | [link](https://mathdatasimplified.com/2021/02/14/pathlib-create-write-and-rename-files-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_write)
121 | | Pathlib: Iterate Over All Files that End with ‘.csv’ in a Directory | [link](https://mathdatasimplified.com/2020/12/31/pathlib-iterate-over-all-files-that-end-with-csv-in-a-directory/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_iterate_files_end_with)
122 | | Path.parents: Get the Parent Directory of a File | [link](https://mathdatasimplified.com/2021/06/24/path-parents-get-the-parent-directory-of-a-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/path_parents) 
123 | | How to Improve the Readability of your JSON file using Indent| [link](https://mathdatasimplified.com/2021/04/27/how-to-improve-the-readability-of-your-json-file-using-indent/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/improve_json_readability.py)
124 | | `__main__.py`: Run a Directory like a Main Script | [link](https://mathdatasimplified.com/2021/03/15/__main__-py-run-a-directory-like-a-main-script/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/main_example)
125 | 
126 | ### Error handling
127 | 
128 | | Title        | Explanation | Code  |
129 | | ------------- |:-------------:| :-----:|
130 | | Assert in Python: Output a Customized Message When the Assertion Fails | [link](https://mathdatasimplified.com/2021/04/13/assert-in-python-output-a-customized-message-when-the-assertion-fails/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/assert_customize_message.py)
131 | | warnings: Ignore Warnings when Running Python Code | [link](https://mathdatasimplified.com/2021/03/11/warnings-ignore-warnings-when-running-python-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/warnings_example.py)
132 | 
133 | ### Interact with Terminal
134 | 
135 | | Title        | Explanation | Code  |
136 | | ------------- |:-------------:| :-----:|
137 | | How to Execute Shell Commands in a Python Script | [link](https://mathdatasimplified.com/2021/04/10/how-to-execute-shell-commands-in-a-python-script/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/os_system.py)
138 | | argparse: Python Library to Parse Arguments from Command Line | [link](https://mathdatasimplified.com/2020/12/23/argparse-python-library-to-parse-arguments-from-command-line/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/argparse_example.py)
139 | 
140 | ### Best Practices
141 | 
142 | | Title        | Explanation | Code  |
143 | | ------------- |:-------------:| :-----:|
144 | | Stop Writing Code Comments. Use Meaningful Names Instead | [link](https://mathdatasimplified.com/2021/01/14/stop-writing-code-comments-use-meaningful-names-instead/) 
145 | | Underscore(_): Ignore values that will not be used | [link](https://mathdatasimplified.com/2020/12/25/underscore_-ignore-values-that-will-not-be-used/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/ignore_variables.py)
146 | | Underscore “_”: Ignore the index in Python for loops | [link](https://mathdatasimplified.com/2020/12/20/underscore-_-ignore-the-index-in-python-for-loops/)|  [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/underscore_for_loop.py)
147 | | Save Immediate Output when an Error Occurs | [link](https://mathdatasimplified.com/2020/12/10/save-immediate-output-when-an-error-occurs/)
148 | | Print error without stopping the for loop in Python | [link](https://mathdatasimplified.com/2020/12/06/print-error-without-stopping-the-for-loop-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/print_error.py)
149 | | Python Pass Statement | [link](https://mathdatasimplified.com/2020/12/02/python-pass-statement/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pass_statement.py)
150 |  | Type hint in Python 3.9 | [link](https://mathdatasimplified.com/2020/11/23/type-hint-in-python-3-9/)
151 | 
152 | ### Code Speed
153 |  
154 |  | Title        | Explanation | Code  |
155 | | ------------- |:-------------:| :-----:|
156 |  | Concurrently execute tasks on separate CPUs | [link](https://mathdatasimplified.com/2020/11/23/concurrently-execute-tasks-on-separate-cpus/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/multiprocessing_example.py)
157 |  | Compare the execution time between 2 functions |[link](https://mathdatasimplified.com/2020/11/23/compare-the-execution-time-between-2-functions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/compare_execution_time.py)
158 | 
159 | 
160 | <h1 id='pandas'> Pandas <img src="images/panda.png"> </h1>
161 | 
162 | ### Change Values
163 | 
164 | | Title        | Explanation | Code  |
165 | | ------------- |:-------------:| :-----:|
166 | | pd.DataFrame.agg: Aggregate over Columns or Rows Using Multiple Operations | [link](https://mathdatasimplified.com/2021/05/09/pd-dataframe-agg-aggregate-over-columns-or-rows-using-multiple-operations/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_dataframe_agg.py)
167 | | pandas.DataFrame.agg: Apply Different Aggregations to Different Columns | [link](https://mathdatasimplified.com/2021/07/29/pandas-dataframe-agg-apply-different-aggregations-to-different-columns/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_agg.py)
168 | | DataFrame.pipe: Increase the Readability of your Code when Applying Multiple Functions to a DataFrame | [link](https://mathdatasimplified.com/2021/04/20/dataframe-pipe-increase-the-readability-of-your-code-when-applying-multiple-functions-to-a-dataframe/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/dataframe_pipe.py)
169 | | pd.Series.map: Change Values of a Pandas Series Using a Dictionary | [link](https://mathdatasimplified.com/2021/05/21/pd-series-map-change-values-of-a-pandas-series-using-a-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_Series_map.py)
170 | | pd.Series.str: Manipulate Text Data in a pandas Series | [link](https://mathdatasimplified.com/2021/04/03/pd-series-str-manipulate-text-data-in-a-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_str.py)
171 | | set_categories in pandas: Sort Categorical Column by a Specific Ordering | [link](https://mathdatasimplified.com/2021/02/09/set_categories-in-pandas-how-to-sort-categorical-column-by-a-specific-ordering/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/set_categories.py)
172 | | parse_dates: Convert Columns into Datetime When Using Pandas to Read CSV Files | [link](https://mathdatasimplified.com/2021/01/02/parse_dates-convert-columns-into-datetime-when-using-pandas-to-read-csv-files/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/parse_dates.py)
173 | | Filter Rows only if Column Contains Values from another List | [link](https://mathdatasimplified.com/2020/12/19/filter-rows-only-if-column-contains-values-from-another-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/s.is_in.py)
174 | | Specify suffixes when using df.merge() | [link](https://mathdatasimplified.com/2020/12/01/specify-suffixes-when-using-df-merge/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_merge.py)
175 | | Specify the datatype to speed up your code and reduce memory | [link](https://mathdatasimplified.com/2020/11/23/specify-the-datatype-to-speed-up-your-code-and-reduce-memory/)
176 | | Highlight your pandas DataFrame | [link](https://mathdatasimplified.com/2020/11/23/highlight-your-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/highlight_pandas.ipynb)
177 | | Assign Values to Multiple New Columns | [link](https://mathdatasimplified.com/2020/11/23/assign-values-to-multiple-new-columns/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_assign.py)
178 | | Reduce pd.DataFrame’s Memory | [link](https://mathdatasimplified.com/2020/11/23/reduce-pd-dataframes-memory/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/reduce_memory.py)
179 | | pd.DataFrame.explode: Transform Each Element in an Iterable to a Row | [link](https://mathdatasimplified.com/2021/07/08/pd-dataframe-explode-transform-each-element-in-an-iterable-to-a-row/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_explode.py)
180 | | pandas.cut: Bin a DataFrame’s values into Discrete Intervals | [link](https://mathdatasimplified.com/2021/07/13/pandas-cut-bin-a-dataframes-values-into-discrete-intervals/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_cut.py)
181 | | Forward Fill in Pandas: Use the Previous Value to Fill the Current Missing Value | [link](https://mathdatasimplified.com/2021/08/20/forward-fill-use-the-previous/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_fillna.py)
182 | | pandas.pivot_table: Turn Your DataFrame Into a Pivot Table | [link](https://mathdatasimplified.com/2021/08/24/pandas-pivot_table-turn-your-dataframe-into-a-pivot-table/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_pivot.py)
183 | 
184 | ### Get Values
185 | 
186 | | Title        | Explanation | Code  |
187 | | ------------- |:-------------:| :-----:|
188 | | df.columns.str.startswith: Find DataFrame’s Columns that Start with a Pattern | [link](https://mathdatasimplified.com/2021/05/27/df-columns-str-startswith-find-dataframes-columns-that-start-with-a-pattern/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_columns_str_start_with.py)
189 | | pandas.DataFrame.iterrows: Iterate over Rows of a DataFrame | [link](https://mathdatasimplified.com/2021/06/15/pandas-dataframe-iterrows-iterate-over-rows-of-a-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_dataframe_iterrows.py)
190 | | pandas.Series.dt: Access Datetime Properties of pandas Series | [link](https://mathdatasimplified.com/2021/05/13/pandas-series-dt-access-datetime-properties-of-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_dt.py)
191 | | pd.Series.between: Select Rows in a pandas Series Containing Values between 2 Numbers | [link](https://mathdatasimplified.com/2021/03/03/pd-series-between-obtain-the-rows-with-values-lie-between-2-numbers/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_between.py)
192 | | DataFrame rolling: Find the average of the previous n datapoints using Pandas | [link](https://mathdatasimplified.com/2021/01/31/dataframe-rolling-find-the-average-of-the-previous-n-datapoints-using-pandas/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_rolling.py)
193 | | select_dtypes: Return a subset of a DataFrame including/excluding columns based on their dtype | [link](https://mathdatasimplified.com/2021/01/26/select_dtypes-return-a-subset-of-a-dataframe-including-excluding-columns-based-on-their-dtype/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/select_dtypes.py)
194 | | pct_change: Find the percentage change between the current and a prior element in a pandas Series | [link](https://mathdatasimplified.com/2021/01/19/pct_change-find-the-percentage-change-between-the-current-and-a-prior-element-in-a-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pct_change.py)
195 | | DataFrame.diff and DataFrame.shift: Take the Difference between Rows within a Column in Pandas | [link](https://mathdatasimplified.com/2021/01/07/dataframe-diff-and-dataframe-shift-take-the-difference-between-rows-within-a-column-in-pandas/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_diff.py)
196 | | Pandas DataFrame: How to select all columns that start with a word | [link](https://mathdatasimplified.com/2020/11/27/pandas-dataframe-how-to-select-all-columns-that-start-with-a-word/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/select_columns_start_with.py)
197 | | Exclude Outliers | [link](https://mathdatasimplified.com/2020/11/23/exclude-outliers/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/exclude_outliers.py)
198 | | Pandas DataFrame Get Data in a Year Range | [link](https://mathdatasimplified.com/2020/11/23/pandas-dataframe-get-data-in-a-year-range/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/get_data_in_year_range.py)
199 | | pd.reindex: Replace the Values of the Missing Dates with 0 | [link](https://mathdatasimplified.com/2021/07/20/pd-reindex-replace-the-values-of-the-missing-dates-with-0/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_reindex.py)
200 | | Select DataFrame Rows Before or After a Specific Date | [link](https://mathdatasimplified.com/2021/07/23/select-dataframe-rows-before-or-after-a-specific-date/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_datetime_comparison.py)
201 | | DataFrame.groupby.sample: Get a Random Sample of Items from Each Category in a Column | [link](https://mathdatasimplified.com/2021/08/10/dataframe-groupby-sample-get-a-random-sample-of-items-from-each-category-in-a-column/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_groupby_sample.py)
202 | 
203 | ### Testing
204 | 
205 | | Title        | Explanation | Code  |
206 | | ------------- |:-------------:| :-----:|
207 | | assert_frame equal: Test whether Two DataFrames are Similar | [link](https://mathdatasimplified.com/2021/04/15/assert_frame-equal-test-whether-two-dataframes-are-similar/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/assert_frame_equal.py)
208 | 
209 | 
210 | 
211 | 
212 | <h1 id='numpy'> Numpy <img src="images/numpy.png"> </h1>
213 | 
214 | | Title        | Explanation | Code  |
215 | | ------------- |:-------------:| :-----:|
216 | | np.ravel: Flatten a Numpy Array | [link](https://mathdatasimplified.com/2021/05/18/np-ravel-flatten-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/np_ravel.py)
217 | | Use List to Change the Positions of Rows or Columns in a Numpy Array | [link](https://mathdatasimplified.com/2021/05/07/use-list-to-change-the-positions-of-rows-or-columns-in-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/use_list_to_change_position_of_arrays.py)
218 | | Key Parameter in Max(): Find the Key with the Largest Value | [link](https://mathdatasimplified.com/2021/02/19/key-parameter-in-max-find-the-key-with-the-largest-value/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/key_in_max.py)
219 | | Difference between Numpy’s All and Any Methods | [link](https://mathdatasimplified.com/2021/03/31/difference-between-numpys-all-and-any-methods/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/any_all.py)
220 | | Double np.argsort: Get Rank of Values in an Array | [link](https://mathdatasimplified.com/2021/01/03/double-np-argsort-get-rank-of-values-in-an-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/double_np_argsort.py)
221 | | Get the index of the max value in a Numpy array | [link](https://mathdatasimplified.com/2020/12/15/get-the-index-of-the-max-value-in-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/np_argmax.py)
222 | | np.all: Test Whether All Elements along a Given Axis of a NumPy Array Evaluate to True | [link](https://mathdatasimplified.com/2021/06/22/np-all-test-whether-all-elements-along-a-given-axis-of-a-numpy-array-evaluate-to-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_array_all.py)
223 | | np.where: Replace Elements of a NumPy Array Based on a Condition | [link](https://mathdatasimplified.com/2021/03/20/np-where-transform-values-of-a-numpy-array-using-conditions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_where.py)
224 | | array-to-latex: Turn a Numpy Array into Latex | [link](https://mathdatasimplified.com/2021/06/23/array_to_latex-turn-a-numpy-array-into-latex/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/array_to_latex_example.ipynb)
225 | | Numpy Comparison Operators | [link](https://mathdatasimplified.com/2021/07/15/numpy-comparison-operators/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_comparison.py)
226 | | NumPy.linspace: Get Evenly Spaced Numbers Over a Specific Interval | [link](https://mathdatasimplified.com/2021/08/05/numpy-linspace-get-evenly-spaced-numbers-over-a-specific-interval/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_linspace.py)
227 | | NumPy.testing.assert_almost_equal: Check If Two Arrays Are Equal up to a Certain Precision | [link](https://mathdatasimplified.com/2021/08/12/numpy-testing-assert_almost_equal-check-if-two-arrays-are-equal-up-to-a-certain-precision/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/numpy_testing_almost_equal.py)
228 | 
229 | 
230 | <h1 id='data-science-tools'> Data Science Tools <img src="images/data-science.png"> </h1>
231 | 
232 | ### Testing
233 | 
234 | | Title        | Explanation | Code  |
235 | | ------------- |:-------------:| :-----:|
236 | | snoop : Smart Print to Debug your Python Function | [link](https://mathdatasimplified.com/2021/05/28/snoop-smart-print-to-debug-your-python-function/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/snoop_example.py)
237 | | pytest benchmark: A Pytest Fixture to Benchmark your Code | [link](https://mathdatasimplified.com/2021/05/19/pytest-benchmark-a-pytest-fixture-to-benchmark-your-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_benchmark.py)
238 | | pytest.mark.parametrize: Test your Functions with Multiple Inputs | [link](https://mathdatasimplified.com/2021/06/09/pytest-mark-parametrize-test-your-functions-with-multiple-inputs/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_parametrize.py)
239 | | Pytest: Shows only Failed Tests | [link](https://mathdatasimplified.com/2021/01/27/pytest-shows-only-failed-tests/) 
240 | | Pytest Fixtures: Use the same data for different tests | [link](https://mathdatasimplified.com/2020/12/05/pytest-fixtures-use-the-same-data-for-different-tests/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_fixture.py)
241 | |Pytest repeat | [link](https://mathdatasimplified.com/2020/11/23/pytest-repeat/)|[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_repeat.py)
242 | | Pandera: a Python Library to Validate Your Pandas DataFrame | [link](https://mathdatasimplified.com/2021/01/17/pandera-a-python-library-to-validate-your-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pandera_example.py)
243 | 
244 | ### Data
245 | 
246 | | Title        | Explanation | Code  |
247 | | ------------- |:-------------:| :-----:|
248 | | faker: Create Fake Data in One Line of Code |[link](https://mathdatasimplified.com/2021/05/14/faker-create-fake-data-in-one-line-of-code/)|[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/faker_example.py)
249 | | DVC: A Data Version Control Tool for your Data Science Projects | [link](https://mathdatasimplified.com/2021/05/06/dvc-a-data-version-control-tool-for-your-data-science-projects/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/dvc_example.sh)
250 | | fetch_openml: Get OpenML’s Dataset in One Line of Code | [link](https://mathdatasimplified.com/2021/04/23/fetch_openml-get-openmls-dataset-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/fetch_openml_example.py)
251 | | github-to-sqlite: Download the Data of your Starred GitHub Repositories in One Command Line | [link](https://mathdatasimplified.com/2021/03/30/github-to-sqlite-download-the-data-of-your-starred-github-repositories-in-one-command-line/)
252 | | Autoscraper | [link](https://mathdatasimplified.com/2020/11/23/autoscraper/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/autoscraper_example.py)
253 | | Extract series data from various Internet sources directly into a pandas DataFrame | [link](https://mathdatasimplified.com/2020/11/23/extract-series-data-from-various-internet-sources-directly-into-a-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/extract_various_data.py)
254 | | Compare the similar features between 2 different datasets | [link](https://mathdatasimplified.com/2020/11/23/compare-the-similar-features-between-2-different-datasets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/compare_2_datasets)
255 | | newspaper3k: Extract Meaningful Information From an Articles in 2 Lines of Code | [link](https://mathdatasimplified.com/2021/03/23/newspaper3k-extract-meaningful-information-from-an-articles-in-2-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/newspaper3k.ipynb)
256 | | distfit: Find The Best Theoretical Distribution For Your Data in Python | [link](https://mathdatasimplified.com/2021/09/08/distfit-find-the-best-theoretical-distribution-for-your-data-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/distfit_example.py)
257 | 
258 | ### Feature extraction
259 | 
260 | | Title        | Explanation | Code  |
261 | | ------------- |:-------------:| :-----:|
262 | | datefinder: Automatically Find Dates and Time in a Python String | [link](https://mathdatasimplified.com/2021/05/08/datefinder-automatically-find-dates-and-time-in-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/datefinder_example.py)
263 | | dill’s getname: Get Names a Python Object | [link](https://mathdatasimplified.com/2021/04/29/dills-getname-get-names-a-python-object/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/getname_example.py)
264 | | pytrend: Get the Trend of a Keyword on Google Search Over Time | [link](https://mathdatasimplified.com/2021/04/12/pytrend-get-the-trend-of-a-keyword-on-google-search-over-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytrends_example.ipynb)
265 | | add_datepart: Add Relevant DateTime Features in One Line of Code | [link](https://mathdatasimplified.com/2021/02/11/add_datepart-add-relevant-datetime-features-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/add_datepart_example.py)
266 | | Geopy: Extract Location Based on Python String | [link](https://mathdatasimplified.com/2020/12/08/geopy-extract-location-based-on-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/geopy_example.py)
267 | | Maya: Convert the string to datetime automatically | [link](https://mathdatasimplified.com/2020/11/23/maya-convert-the-string-to-datetime-automatically/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/maya_example.py)
268 | | Select the features by their relevance | [link](https://mathdatasimplified.com/2020/11/23/select-the-features-by-their-relevance/) 
269 | | Extract holiday from date column | [link](https://mathdatasimplified.com/2020/11/23/extract-holiday-from-date-column/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/extract_holidays.py)
270 | | fastai’s cont_cat_split: Get a DataFrame’s Continuous and Categorical Variables Based on Their Cardinality | [link](https://mathdatasimplified.com/2021/07/16/fastais-cont_cat_split-get-a-dataframes-continuous-and-categorical-variables-based-on-their-cardinality/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/fastai_cont_cat_split.py)
271 | 
272 | ### Visualization
273 | 
274 | | Title        | Explanation | Code  |
275 | | ------------- |:-------------:| :-----:|
276 | | D-Tale: A Python Library to Visualize and Analyze your Data Without Code | [link](https://mathdatasimplified.com/2021/05/16/d-tale-a-python-library-to-visualize-and-analyze-your-data-without-code/)
277 | | Graphviz: Create a Flowchart to Capture your Ideas in Python | [link](https://mathdatasimplified.com/2021/02/06/graphviz-create-a-flowchart-to-capture-your-ideas-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/environment_variables)
278 | | Create an interactive map in Python | [link](https://mathdatasimplified.com/2020/12/03/create-an-interactive-map-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/folium_example.ipynb)
279 | | dtreeviz: Visualize and Interpret a Decision Tree Model | [link](https://mathdatasimplified.com/2021/09/01/dtreeviz-visualize-and-interpret-a-decision-tree-model/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/dtreeviz_example.py)
280 | 
281 | ### Sharing and Downloading
282 | 
283 | | Title        | Explanation | Code  |
284 | | ------------- |:-------------:| :-----:|
285 | | Datapane: Publish your Python Objects on the Web in 2 Lines of Code | [link](https://mathdatasimplified.com/2021/04/25/datapane-publish-your-python-objects-on-the-web-in-2-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/datapane_example.py)
286 | | gdown: Download a File from Google Drive in Python | [link](https://mathdatasimplified.com/2021/01/04/gdown-download-a-file-from-google-drive-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/gdown_example.py)
287 | 
288 | ### Natural Language Processing
289 | 
290 | | Title        | Explanation | Code  |
291 | | ------------- |:-------------:| :-----:|
292 | | TextBlob: Processing Text in One Line of Code | [link](https://mathdatasimplified.com/2021/04/16/textblob-processing-text-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/textblob_example.py)
293 | | sumy: Summarize Text in One Line of Code | [link](https://mathdatasimplified.com/2021/03/10/sumy-summarize-text-in-one-line-of-code/)
294 | | Spacy_streamlit: Create a Web App to Visualize your Text in 3 Lines of Code | [link](https://mathdatasimplified.com/2020/12/29/spacy_streamlit-create-a-web-app-to-visualize-your-text-in-3-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/streamlit_app.py)
295 | | Extract a contiguous sequence of 2 words | [link](https://mathdatasimplified.com/2020/11/23/extract-a-contiguous-sequence-of-2-words/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/spacy_ngram.py)
296 | | Detect the “almost similar” articles | [link](https://mathdatasimplified.com/2020/11/23/detect-the-almost-similar-articles/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/difflib_example.py)
297 | | Convert number to words | [link](https://mathdatasimplified.com/2020/11/23/convert-number-to-words/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/convert_number_to_words.py)
298 | | texthero.clean: Preprocess Text in One Line of Code | [link](https://mathdatasimplified.com/2021/07/30/texthero-clean-preprocess-text-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/texthero_examples.py)
299 | | wordfreq: Estimate the Frequency of a Word in 36 Languages | [link](https://mathdatasimplified.com/2021/09/10/wordfreq-estimate-the-frequency-of-a-word-in-36-languages/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/wordfreq_example.py)
300 | 
301 | ### Tools for Best Python Practices
302 | 
303 | | Title        | Explanation | Code  |
304 | | ------------- |:-------------:| :-----:|
305 | | Don’t Hard-Code. Use Hydra Instead | [link](https://mathdatasimplified.com/2021/04/08/dont-hard-code-use-hydra-instead/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/hydra_example)
306 | | python-dotenv: How to Load the Secret Information from .env File | [link](https://mathdatasimplified.com/2021/02/20/python-dotenv-how-to-load-the-secret-information-from-env-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/dotenv_example)
307 | | kedro pipeline: Create Pipeline for your Data Science Projects in Python | [link](https://mathdatasimplified.com/2021/02/03/kedro-pipeline-create-pipeline-for-your-data-science-projects-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/kedro_example.py)
308 | | docopt: Create Beautiful Command-line Interfaces for Documentation in Python | [link](https://mathdatasimplified.com/2021/03/18/docopt-create-beautiful-command-line-interfaces-for-documentation-in-python/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/docopt_example.py)
309 | 
310 | ### Speed Up Code
311 | 
312 | | Title        | Explanation | Code  |
313 | | ------------- |:-------------:| :-----:|
314 | | fastai’s df_shrink: Shrink DataFrame’s Memory Usage in One Line of Code | [link](https://mathdatasimplified.com/2021/02/24/fastais-df_shrink-shrink-dataframes-memory-usage-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/fastai_df_shrink.py)
315 | | Swifter: Add One Word to Make your Pandas Apply 23 Times Faster | [link](https://mathdatasimplified.com/2021/01/13/swifter-add-one-word-to-make-your-pandas-apply-23-times-faster/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/swifter_example.py)
316 | 
317 | ### Better Pandas
318 | 
319 | | Title        | Explanation | Code  |
320 | | ------------- |:-------------:| :-----:| 
321 | | rich-dataframe: Create Animated and Colorful Pandas Dataframe | [link](https://mathdatasimplified.com/2021/02/17/rich-dataframe-create-animated-and-colorful-pandas-dataframe/) 
322 | | tqdm: Add Progress Bar to your Pandas Apply | [link](https://mathdatasimplified.com/2020/12/30/tqdm-add-progress-bar-to-your-pandas-apply/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/tqdm_example.py)
323 | | tqdm.set_description: Set a Description for Your Progress Bar | [link](https://mathdatasimplified.com/2021/08/18/tqdm-set_description-set-a-description-for-your-progress-bar/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/tqdm_set_description.py)
324 | 
325 | ### Machine Learning
326 | 
327 | | Title        | Explanation | Code  |
328 | | ------------- |:-------------:| :-----:| 
329 | | causalimpact: Find Causal Relation of an Event and a Variable in Python | [link](https://mathdatasimplified.com/2021/01/25/causalimpact-find-causal-relation-of-an-event-and-a-variable-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/causalimpact_example.ipynb)
330 | | Pipeline + GridSearchCV: Prevent Data Leakage when Scaling the Data | [link](https://mathdatasimplified.com/2020/12/27/pipeline-gridsearchcv-prevent-data-leakage-when-scaling-the-data/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pipeline_gridsearchcv.py)
331 | | Decompose high dimensional data into two or three dimensions | [link](https://mathdatasimplified.com/2020/11/23/decompose-high-dimensional-data-into-two-or-three-dimensions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/decompose_high_dementional_data.ipynb)
332 | | Cross Validation with Time Series | [link](https://mathdatasimplified.com/2020/11/23/cross-validation-with-time-series/)
333 | | squared=False: Get RMSE from Sklearn’s mean_squared_error method | [link](https://mathdatasimplified.com/2021/08/13/squaredfalse-get-rmse-from-sklearns-mean_squared_error-method/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/sklearn_rmse.py)
334 | 
335 | <h1 id='terminal'> Terminal <img src="images/command-window.png"> </h1>
336 | 
337 | ### Text
338 | 
339 | | Title        | Explanation | Code  |
340 | | ------------- |:-------------:| :-----:|
341 | | tr Command: Translate Characters to Improve Readability In Unix/Linux | [link](https://mathdatasimplified.com/2021/04/05/tr-command-translate-characters-to-improve-readability-in-unix-linux/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/tr_command.sh)
342 | | Sed Command: Replace a string with another string on the command line | [link](https://mathdatasimplified.com/2020/12/17/sed-command-replace-a-string-with-another-string-on-the-command-line/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/sed_command)
343 | 
344 | ### Files
345 | 
346 | | Title        | Explanation | Code  |
347 | | ------------- |:-------------:| :-----:|
348 | | fd: a Simple Tool to Search for Files or Directories Fast | [link](https://mathdatasimplified.com/2021/04/09/fd-a-simple-tool-to-search-for-files-or-directories-fast/)
349 | | ln -s: Create Symbolic Link Between 2 Files | [link](https://mathdatasimplified.com/2021/04/11/ln-s-create-symbolic-link-between-2-files/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/create_symbolic_link.sh)
350 | | tee: Save Command Output to a File | [link](https://mathdatasimplified.com/2021/03/06/tee-save-command-output-to-a-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/tee_example.sh)
351 | | Make Important Files Impossible to be Deleted | [link](https://mathdatasimplified.com/2021/01/15/make-important-files-impossible-to-be-deleted/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/make_file_immutable.sh)
352 | | View tree structure of your file | [link](https://mathdatasimplified.com/2020/11/23/view-tree-structure-of-your-file/)
353 | 
354 | ### Tracking
355 | 
356 | | Title        | Explanation | Code  |
357 | | ------------- |:-------------:| :-----:|
358 | | timeit on the Command Line: Measure Execution Time of Small Code Snippets | [link](https://mathdatasimplified.com/2021/05/25/timeit-on-the-command-line-measure-execution-time-of-small-code-snippets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/timeit_command_line.sh)
359 | | Time Command: Track the Time it Takes to Execute a File in Linux | [link](https://mathdatasimplified.com/2021/01/24/time-command-track-the-time-it-takes-to-execute-a-file-in-linux/)
360 | | htop | [link](https://mathdatasimplified.com/2020/11/23/htop/)
361 | 
362 | ### Python 
363 | 
364 | | Title        | Explanation | Code  |
365 | | ------------- |:-------------:| :-----:|
366 | | Python Shell as an Calculator: Grab the Last Output Using “_” | [link](https://mathdatasimplified.com/2021/03/16/python-shell-as-an-calculator-grab-the-last-output-using-_/)
367 | | Find version of a Python library using pip list and grep | [link](https://mathdatasimplified.com/2020/12/04/find-version-of-a-python-library-using-pip-list-and-grep/)
368 | | Conda rollback to the last revision | [link](https://mathdatasimplified.com/2020/11/23/conda-rollback-to-the-last-revision/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/conda_rollback.sh)
369 | | How to Check Whether a Library is Installed | [link](https://mathdatasimplified.com/2020/11/23/1006/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/check_if_library_is_installed.sh)
370 | | pydash.chunk: Split Elements in a List into Groups of n Items | [link](https://mathdatasimplified.com/2021/08/11/pydash-chunk-split-elements-in-a-list-into-groups-of-n-items/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/pydash_chunk.py)
371 | ### Prettify Terminal
372 | 
373 | | Title        | Explanation | Code  |
374 | | ------------- |:-------------:| :-----:|
375 | | colorls: Beautify your ls Command with Color and Icons | [link](https://mathdatasimplified.com/2021/04/18/colorls-beautify-your-ls-command-with-color-and-icons/)
376 | | Colorama: Produce a colored terminal text in Python | [link](https://mathdatasimplified.com/2020/12/13/colorama-produce-a-colored-terminal-text-in-python/) 
377 | 
378 | ### Sharing
379 | 
380 | | Title        | Explanation | Code  |
381 | | ------------- |:-------------:| :-----:|
382 | | terminalizer: Record and Share your Terminal Sessions | [link](https://mathdatasimplified.com/2021/03/08/termanalizer-record-and-share-your-terminal-sessions/)
383 | 
384 | ### Productive Hacks
385 | 
386 | | Title        | Explanation | Code  |
387 | | ------------- |:-------------:| :-----:|
388 | | Bash For Loop: Stop Staring at your Screen. Write a Bash For Loop instead | [link](https://mathdatasimplified.com/2021/03/29/bash-for-loop-stop-staring-at-your-screen-write-a-bash-for-loop-instead/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/bash_for_loop.sh)
389 | | Environment Variables: Save Private Information in your Local Machine | [link](https://mathdatasimplified.com/2021/02/07/environment-variables-save-private-information-in-your-local-machine/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/environment_variables)
390 | | Pet: A Command-line Snippet Tool That Allows you to Store your Favorite Commands | [link](https://mathdatasimplified.com/2021/01/17/pet-a-command-line-snippet-tool-that-allows-you-to-store-your-favorite-commands/)
391 | | Loop through a list of data on your terminal | [link](https://mathdatasimplified.com/2020/11/23/loop-through-a-list-of-data-on-your-terminal/)
392 | | Multi-run command | [link](https://mathdatasimplified.com/2020/11/23/multi-run-command/) 
393 | | Run multiple commands in one line of code | [link](https://mathdatasimplified.com/2020/11/23/run-multiple-commands-in-one-line-of-code/)
394 | 
395 | <h1 id='cool-tools'> Cool Tools <img src="images/cool.png"> </h1>
396 | 
397 | ### Better Output
398 | 
399 | | Title        | Explanation | Code  |
400 | | ------------- |:-------------:| :-----:|
401 | | How to Strip Outputs and Execute Interactive Code in a Python Script | [link](https://mathdatasimplified.com/2021/05/12/how-to-strip-outputs-and-execute-interactive-code-in-a-python-script/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/strip_interactive_example.py)
402 | | rich.inspect: Produce a Beautiful Report on any Python Object | [link](https://mathdatasimplified.com/2021/04/28/rich-inspect-produce-a-beautiful-report-on-any-python-object/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/rich_inspect.py)
403 | | Rich’s Console: Debug your Python Function in One Line of Code | [link](https://mathdatasimplified.com/2021/02/12/richs-console-debug-your-python-function-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/rich_console.py)
404 | | loguru: Print Readable Traceback in Python | [link](https://mathdatasimplified.com/2021/01/23/loguru-print-readable-traceback-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/loguru_example.py)
405 | | Icecream: Adding a Datetime Stamp to Python print | [link](https://mathdatasimplified.com/2021/01/15/icecream-adding-a-datetime-stamp-to-python-print/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/icecream_datetime.py)
406 | | Icrecream: Never use print() to debug again | [link](https://mathdatasimplified.com/2021/01/01/icrecream-never-use-print-to-debug-again/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/icecream_example.py)
407 | | Pyfiglet: Make large and unique letters out of ordinary text in Python | [link](https://mathdatasimplified.com/2020/12/22/pyfiglet-make-large-and-unique-letters-out-of-ordinary-text-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/pyfiglet_example.py)
408 | | heartrate — Visualize the Execution of a Python Program in Real-Time | [link](https://mathdatasimplified.com/2021/06/25/heartrate-visualize-the-execution-of-a-python-program-in-real-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/heart_rate.py)
409 | 
410 | ### Tracking
411 | 
412 | | Title        | Explanation | Code  |
413 | | ------------- |:-------------:| :-----:|
414 | | Stacer: Visualize the History of your CPU and Memory Usage | [link](https://mathdatasimplified.com/2021/05/02/stacer-visualize-the-history-of-your-cpu-and-memory-usage/)
415 | 
416 | ### Data
417 | 
418 | | Title        | Explanation | Code  |
419 | | ------------- |:-------------:| :-----:|
420 | | sherlock: Search for a Username Across 298 Popular Website | [link](https://mathdatasimplified.com/2021/03/09/sherlock-search-for-a-username-across-298-popular-websites/)
421 | | getme forecast: Get the Weather Forecast Through your Terminal | [link](https://mathdatasimplified.com/2021/01/10/getme-forecast-get-the-weather-forecast-through-your-terminal/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/getme_forecast.sh)
422 | 
423 | ### Automation
424 | 
425 | | Title        | Explanation | Code  |
426 | | ------------- |:-------------:| :-----:|
427 | | notion-py: Access and Edit your Notion App Using Python | [link](https://mathdatasimplified.com/2021/04/01/notion-py-access-and-edit-your-notion-app-using-python/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/notion_example.py)
428 | | organize: Automate Organizing Files with Command Line | [link](https://mathdatasimplified.com/2021/03/14/organize-automate-organizing-files-with-command-line/) 
429 | | Schedule: Schedule your Python Functions to Run At a Specific Time | [link](https://mathdatasimplified.com/2021/01/30/schedule-schedule-your-python-functions-to-run-at-a-specific-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/schedule_example.py)
430 | | notify-send: Send a Desktop Notification after Finishing Executing a File | [link](https://mathdatasimplified.com/2021/01/20/notify-send-send-a-desktop-notification-after-finishing-executing-a-file/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/notify_send.sh)
431 | | isort: Automatically Sort your Python Imports in 1 Line of Code | [link](https://mathdatasimplified.com/2021/01/06/isort-automatically-sort-your-python-imports-in-1-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/isort_example.py)
432 | | knockknock: Receive an email when your code finishes executing | [link](https://mathdatasimplified.com/2020/11/23/knockknock-receive-an-email-when-your-code-finishes-executing/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/knockknock_example.py)
433 | | snsscrape: Scrape Social Networking Services in Python | [link](https://mathdatasimplified.com/2021/06/30/snsscrape-scrape-social-networking-services-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/snsscrape_example.sh)
434 | | Typer: Build a Command-Line Interface in a Few Lines of Code | [link](https://mathdatasimplified.com/2021/07/14/typer-build-a-command-line-interface-in-a-few-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/box_example.py)
435 | | yarl: Create and Extract Elements from a URL Using Python | [link](https://mathdatasimplified.com/2021/07/21/yarl-create-and-extract-elements-from-a-url-using-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/yarl_example.py)
436 | | interrogate: Check your Python Code for Missing Docstrings | [link](https://mathdatasimplified.com/2021/08/06/interrogate-check-your-python-code-for-missing-docstrings/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/interrogate_example.py)
437 | | mypy: Static Type Checker for Python | [link](https://mathdatasimplified.com/2021/08/23/mypy-static-type-checker-for-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/mypy_example.py)
438 | 
439 | ### Git and GitHub
440 | 
441 | | Title        | Explanation | Code  |
442 | | ------------- |:-------------:| :-----:|
443 | | Github CLI: Brings GitHub to your Terminal | [link](https://mathdatasimplified.com/2021/02/21/github-cli-brings-github-to-your-terminal/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/gh_cli.sh)
444 | | Pull one file from another branch using git | [link](https://mathdatasimplified.com/2020/11/23/pull-one-file-from-another-branch-using-git/)
445 | | Download a file on Github using wget | [link](https://mathdatasimplified.com/2020/11/23/download-a-file-on-github-using-wget/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/download_github_file.sh)
446 | | github1s: Read GitHub Code with VS Code on your Browser in One Second | [link](https://mathdatasimplified.com/2021/02/15/github1s-read-github-code-with-vs-code-on-your-browser-in-one-second/)
447 | | PyGithub: Manage your Github resources using Python | [link](https://mathdatasimplified.com/2020/12/24/pygithub-manage-your-github-resources-using-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/pygithub_example.py)
448 | | Astral: Organize your Github stars with ease | [link](https://mathdatasimplified.com/2020/12/18/astral-organize-your-github-stars-with-ease/)
449 | | pip install -e: Install Forked GitHub Repository using Pip | [link](https://mathdatasimplified.com/2021/07/28/pip-install-e-install-forked-github-repository-using-pip/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/install_forked.sh)
450 | 
451 | ### Alternative Approach
452 | 
453 | | Title        | Explanation | Code  |
454 | | ------------- |:-------------:| :-----:|
455 | | Box: Using Dot Notation to Access Keys in a Python Dictionary | [link](https://mathdatasimplified.com/2021/03/02/box-using-dot-notation-to-access-keys-in-a-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/box_example.py)
456 | | decorator module: Write Shorter Python Decorators without Nested Functions | [link](https://mathdatasimplified.com/2021/03/27/decorator-module-write-shorter-python-decorators-without-nested-functions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/decorator_module.py)
457 | | virtualenv-clone: Create a Copy of a Virtual Environment | [link](https://mathdatasimplified.com/2021/02/01/virtualenv-clone-create-a-copy-of-a-virtual-environment/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/virtualenv_clone.sh)
458 | 
459 | <h1 id='jupyter-notebook'> Jupyter Notebook <img src="images/notebook.png"> </h1>
460 | 
461 | | Title        | Explanation | Code  |
462 | | ------------- |:-------------:| :-----:|
463 | | nbdime: Better Version Control for Jupyter Notebook | [link](https://mathdatasimplified.com/2021/06/04/nbdime-better-version-control-for-jupyter-notebook/) 
464 | | display in IPython: Display math equations in Jupyter Notebook | [link](https://mathdatasimplified.com/2021/03/01/display-in-ipython-display-math-equations-in-jupyter-notebook/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/notebook/display_math_equations.ipynb)
465 | | Reuse the notebook to run the same code across different data | [link](https://mathdatasimplified.com/2020/11/23/reuse-the-notebook-to-run-the-same-code-across-different-data/)
466 | | ngrok: Create a Public Server for your Jupyter Notebook in 1 Line of Code | [link](https://mathdatasimplified.com/2021/05/26/ngrok-create-a-public-server-for-your-jupyter-notebook-in-1-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/ngrok_example.sh)
467 | | watermark: Get Information About Your Hardware and the Packages Being Used within Your Notebook | [link](https://mathdatasimplified.com/2021/07/07/watermark-get-information-about-your-hardware-and-the-packages-being-used-within-your-notebook/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/notebook/watermark_example.ipynb)
468 | 


--------------------------------------------------------------------------------