├── _config.yml ├── code_snippets ├── python │ ├── move_files │ │ ├── dir1 │ │ │ └── example.txt │ │ ├── dir2 │ │ │ └── .gitkeep │ │ └── move_files.py │ ├── pathlib_write │ │ ├── greeting │ │ └── path_test.py │ ├── path_parents │ │ ├── test1 │ │ │ └── test2 │ │ │ │ └── test.txt │ │ └── path_parents.py │ ├── pathlib_iterate_files_end_with │ │ ├── data │ │ │ ├── data1.csv │ │ │ ├── data2.csv │ │ │ └── data3.csv │ │ └── main.py │ ├── pathlib_path │ │ ├── data │ │ │ └── processed │ │ │ │ ├── data1.csv │ │ │ │ └── data2.csv │ │ └── main.py │ ├── main_example │ │ └── salulation │ │ │ └── __main__.py │ ├── warnings_example.py │ ├── any_example.py │ ├── underscore_large_number.py │ ├── dates_in_month_as_feature.py │ ├── extended_iterable_unpacking.py │ ├── ignore_variables.py │ ├── underscore_for_loop.py │ ├── itertools_islice.py │ ├── random_choice.py │ ├── random_sample.py │ ├── fractions_example.py │ ├── join_list.py │ ├── unpack_iterables.py │ ├── check_if_number.py │ ├── division_operators.py │ ├── key_in_max.py │ ├── boolean_operators.py │ ├── list_extend.py │ ├── list_map.py │ ├── zip_example.py │ ├── eval_example.py │ ├── print_error.py │ ├── set_intersection.py │ ├── assert_customize_message.py │ ├── kwargs.py │ ├── collections_defaultdict.py │ ├── os_system.py │ ├── partial_function.py │ ├── copy_method.py │ ├── re_sub_example.py │ ├── zip_function.py │ ├── pass_statement.py │ ├── set_difference.py │ ├── slice_example.py │ ├── filter_example.py │ ├── return_multiple_values_with_dictionary.py │ ├── collections_ordereddict.py │ ├── __call__example.py │ ├── enumerate_example.py │ ├── multiprocessing_example.py │ ├── string_find.py │ ├── namedtuple_example.py │ ├── compare_execution_time.py │ ├── defaultdict_example.py │ ├── __str__and__repr.py │ ├── datetime_timedelta.py │ ├── improve_json_readability.py │ ├── itertools_combinations_example.py │ ├── list_comprehension.py │ ├── staticmethod_example.py │ ├── multiples_of_a_number.py │ ├── collections_counter.py │ ├── getattr_example.py │ ├── args_example.py │ ├── abc_example.py │ ├── built_in_functions_speed.py │ ├── classmethod_example.py │ ├── argparse_example.py │ ├── decorator_example.py │ ├── itertools_examples.py │ ├── property_decorator.py │ ├── heapq_example.py │ └── singledispatch_example.py ├── terminal │ ├── check_if_library_is_installed.sh │ ├── sed_command │ │ ├── weather.txt │ │ └── sed_command.sh │ ├── conda_rollback.sh │ ├── environment_variables │ │ ├── main.py │ │ └── .bash_profile │ ├── download_github_file.sh │ ├── gh_cli.sh │ ├── virtualenv_clone.sh │ ├── tee_example.sh │ ├── timeit_command_line.sh │ ├── make_file_immutable.sh │ ├── bash_for_loop.sh │ ├── create_symbolic_link.sh │ └── tr_command.sh ├── cool_tools │ ├── notify_send.sh │ ├── rich_inspect.py │ ├── pydash_chunk.py │ ├── install_forked.sh │ ├── typer_example.py │ ├── tqdm_set_description.py │ ├── ngrok_example.sh │ ├── box_example.py │ ├── strip_interactive_example.py │ ├── snsscrape_example.sh │ ├── heart_rate.py │ ├── mypy_example.py │ ├── yarl_example.py │ ├── knockknock_example.py │ ├── schedule_example.py │ ├── rich_console.py │ ├── icecream_example.py │ ├── loguru_example.py │ ├── icecream_datetime.py │ ├── pygithub_example.py │ ├── pyfiglet_example.py │ ├── getme_forecast.sh │ ├── decorator_module.py │ ├── notion_example.py │ ├── isort_example.py │ └── interrogate_example.py ├── data_science_tools │ ├── dotenv_example │ │ ├── .env │ │ └── example.py │ ├── pytest_repeat.py │ ├── hydra_example │ │ ├── config.yml │ │ └── main.py │ ├── fetch_openml_example.py │ ├── difflib_example.py │ ├── sklearn_rmse.py │ ├── gdown_example.py │ ├── distfit_example.py │ ├── extract_various_data.py │ ├── faker_example.py │ ├── maya_example.py │ ├── pytest_benchmark_example.py │ ├── snoop_example.py │ ├── streamlit_app.py │ ├── extract_holidays.py │ ├── convert_number_to_words.py │ ├── tqdm_example.py │ ├── geopy_example.py │ ├── wordfreq_example.py │ ├── dtreeviz_example.py │ ├── pytest_example.py │ ├── dvc_example.sh │ ├── pytest_parametrize.py │ ├── getname_example.py │ ├── graphviz_example.py │ ├── pytest_fixture.py │ ├── datefinder_example.py │ ├── fastai_cont_cat_split.py │ ├── texthero_examples.py │ ├── add_datepart_example.py │ ├── autoscraper_example.py │ ├── docopt_example.py │ ├── datapane_example.py │ ├── textblob_example.py │ ├── causalimpact_example.py │ ├── swifter_example.py │ ├── pipeline_gridsearchcv.py │ ├── spacy_ngram.py │ ├── pandera_example.py │ ├── kedro_example.py │ ├── fastai_df_shrink.py │ ├── compare_2_datasets │ │ └── compare_datasets.ipynb │ ├── mito_example │ │ ├── mito_example.ipynb │ │ └── iris.csv │ ├── newspaper3k.ipynb │ └── folium_example.ipynb ├── numpy │ ├── np_argmax.py │ ├── np_ravel.py │ ├── double_np_argsort.py │ ├── np_where.py │ ├── use_list_to_change_position_of_arrays.py │ ├── np_comparison.py │ ├── np_array_all.py │ ├── any_all.py │ ├── np_linspace.py │ ├── numpy_testing_almost_equal.py │ └── array_to_latex_example.ipynb ├── pandas │ ├── parse_dates.py │ ├── df_explode.py │ ├── pd_dataframe_iterrows.py │ ├── pd_Series_map.py │ ├── pd_series_between.py │ ├── df_columns_str_start_with.py │ ├── pd_dataframe_agg.py │ ├── pd_series_dt.py │ ├── pd_groupby_sample.py │ ├── df_agg.py │ ├── pct_change.py │ ├── exclude_outliers.py │ ├── df_assign.py │ ├── pd_cut.py │ ├── pd_reindex.py │ ├── select_columns_start_with.py │ ├── df_fillna.py │ ├── s.is_in.py │ ├── pd_series_str.py │ ├── df_pivot.py │ ├── set_categories.py │ ├── assert_frame_equal.py │ ├── df_diff.py │ ├── df_datetime_comparison.py │ ├── get_data_in_year_range.py │ ├── df_merge.py │ ├── df_rolling.py │ ├── reduce_memory.py │ ├── dataframe_pipe.py │ ├── select_dtypes.py │ └── highlight_pandas.ipynb └── notebook │ ├── display_math_equations.ipynb │ └── watermark_example.ipynb ├── images ├── cool.png ├── numpy.png ├── panda.png ├── python.png ├── notebook.png ├── command-window.png └── data-science.png ├── .gitignore ├── .pre-commit-config.yaml └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-merlot -------------------------------------------------------------------------------- /code_snippets/python/move_files/dir1/example.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/move_files/dir2/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/pathlib_write/greeting: -------------------------------------------------------------------------------- 1 | Hello! -------------------------------------------------------------------------------- /code_snippets/python/path_parents/test1/test2/test.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/pathlib_iterate_files_end_with/data/data1.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/pathlib_iterate_files_end_with/data/data2.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/pathlib_iterate_files_end_with/data/data3.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code_snippets/python/pathlib_path/data/processed/data1.csv: -------------------------------------------------------------------------------- 1 | col1,col2 2 | 1,2 -------------------------------------------------------------------------------- /code_snippets/python/pathlib_path/data/processed/data2.csv: -------------------------------------------------------------------------------- 1 | col1,col2 2 | 1,2 -------------------------------------------------------------------------------- /code_snippets/terminal/check_if_library_is_installed.sh: -------------------------------------------------------------------------------- 1 | python -c 'import pandas' -------------------------------------------------------------------------------- /code_snippets/python/main_example/salulation/__main__.py: -------------------------------------------------------------------------------- 1 | print("Hello") 2 | print("bye") -------------------------------------------------------------------------------- /code_snippets/python/warnings_example.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') -------------------------------------------------------------------------------- /code_snippets/terminal/sed_command/weather.txt: -------------------------------------------------------------------------------- 1 | Today is a sunny day. I want to be outside. -------------------------------------------------------------------------------- /code_snippets/cool_tools/notify_send.sh: -------------------------------------------------------------------------------- 1 | python file_to_run.py ; notify-send "Process terminated" -------------------------------------------------------------------------------- /code_snippets/python/any_example.py: -------------------------------------------------------------------------------- 1 | text = 'abcdE' 2 | print(any(c for c in text if c.isupper())) -------------------------------------------------------------------------------- /code_snippets/terminal/conda_rollback.sh: -------------------------------------------------------------------------------- 1 | conda list --revisions 2 | conda install --revisions N -------------------------------------------------------------------------------- /code_snippets/data_science_tools/dotenv_example/.env: -------------------------------------------------------------------------------- 1 | USERNAME=my_user_name 2 | PASSWORD=secret_password -------------------------------------------------------------------------------- /code_snippets/python/underscore_large_number.py: -------------------------------------------------------------------------------- 1 | large_num = 1_000_000 2 | print(large_num) 3 | # 1000000 -------------------------------------------------------------------------------- /code_snippets/cool_tools/rich_inspect.py: -------------------------------------------------------------------------------- 1 | from rich import inspect 2 | 3 | print(inspect('hello', methods=True)) -------------------------------------------------------------------------------- /images/cool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/cool.png -------------------------------------------------------------------------------- /code_snippets/python/move_files/move_files.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | shutil.move('dir1/example.txt', 'dir2') 4 | 5 | -------------------------------------------------------------------------------- /images/numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/numpy.png -------------------------------------------------------------------------------- /images/panda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/panda.png -------------------------------------------------------------------------------- /images/python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/python.png -------------------------------------------------------------------------------- /code_snippets/numpy/np_argmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array([0.2, 0.4, 0.7, 0.3]) 3 | print(np.argmax(a)) 4 | # 2 -------------------------------------------------------------------------------- /code_snippets/python/dates_in_month_as_feature.py: -------------------------------------------------------------------------------- 1 | import calendar 2 | 3 | print(calendar.monthrange(2020, 11)[1]) 4 | # 30 -------------------------------------------------------------------------------- /code_snippets/terminal/environment_variables/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.getenv("SECRET_ACCESS_KEY") 4 | # yourkeyhere -------------------------------------------------------------------------------- /images/notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/notebook.png -------------------------------------------------------------------------------- /code_snippets/python/extended_iterable_unpacking.py: -------------------------------------------------------------------------------- 1 | a, *_, b = [1, 2, 3, 4] 2 | print(a) # 1 3 | print(b) # 4 4 | print(_) # [2, 3] -------------------------------------------------------------------------------- /code_snippets/python/ignore_variables.py: -------------------------------------------------------------------------------- 1 | def return_two(): 2 | return 1, 2 3 | 4 | _, var = return_two() 5 | print(var) 6 | # 2 -------------------------------------------------------------------------------- /images/command-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/command-window.png -------------------------------------------------------------------------------- /images/data-science.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khuyentran1401/Python-data-science-code-snippet/HEAD/images/data-science.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | workera.py 4 | venv 5 | create_snippet.sh 6 | .pytest_cache 7 | .vscode 8 | .config -------------------------------------------------------------------------------- /code_snippets/pandas/parse_dates.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.read_csv('data1.csv', parse_dates=['date_column_1', 'date_column_2']) -------------------------------------------------------------------------------- /code_snippets/cool_tools/pydash_chunk.py: -------------------------------------------------------------------------------- 1 | from pydash import py_ 2 | 3 | a = [1, 2, 3, 4, 5] 4 | print(py_.chunk(a, 2)) # [[1, 2], [3, 4], [5]] 5 | -------------------------------------------------------------------------------- /code_snippets/terminal/environment_variables/.bash_profile: -------------------------------------------------------------------------------- 1 | # Save this in ~/.bash_profile 2 | 3 | export SECRET_ACCESS_KEY=yourkeyhere 4 | 5 | -------------------------------------------------------------------------------- /code_snippets/terminal/download_github_file.sh: -------------------------------------------------------------------------------- 1 | wget https://raw.githubusercontent.com/khuyentran1401/Data-science/master/visualization/dropdown/population.csv 2 | -------------------------------------------------------------------------------- /code_snippets/terminal/gh_cli.sh: -------------------------------------------------------------------------------- 1 | cd your_local_folder 2 | 3 | # Create an empty local git repo 4 | git init 5 | 6 | # Create a new GitHub repo 7 | gh repo create -------------------------------------------------------------------------------- /code_snippets/terminal/virtualenv_clone.sh: -------------------------------------------------------------------------------- 1 | pip install virtualenv-clone 2 | virtualenv-clone old_venv/ new_venv/ 3 | 4 | source new_venv/bin/activate 5 | pip list -------------------------------------------------------------------------------- /code_snippets/pandas/df_explode.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'a': [[1, 2], [4, 5]], 'b': [11, 13]}) 4 | print(df) 5 | 6 | print(df.explode('a')) -------------------------------------------------------------------------------- /code_snippets/python/underscore_for_loop.py: -------------------------------------------------------------------------------- 1 | for _ in range(5): 2 | print('Hello') 3 | 4 | """ 5 | Hello 6 | Hello 7 | Hello 8 | Hello 9 | Hello 10 | """ -------------------------------------------------------------------------------- /code_snippets/numpy/np_ravel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | arr = np.array([[1, 2], [3, 41]]) 3 | print(arr) 4 | 5 | print(np.ravel(arr)) 6 | 7 | print(np.ravel(arr, order="F")) -------------------------------------------------------------------------------- /code_snippets/python/itertools_islice.py: -------------------------------------------------------------------------------- 1 | from itertools import islice 2 | 3 | a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 4 | new_a = list(islice(a, 1, 7, 2)) 5 | print(new_a) 6 | -------------------------------------------------------------------------------- /code_snippets/python/random_choice.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | to_do_tonight = ['stay at home', 'attend party', 'do exercise'] 4 | 5 | print(random.choice(to_do_tonight)) -------------------------------------------------------------------------------- /code_snippets/python/random_sample.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | random.seed(1) 4 | nums = [1, 2, 3, 4, 5] 5 | random_nums = random.sample(nums, 2) 6 | print(random_nums) 7 | -------------------------------------------------------------------------------- /code_snippets/terminal/tee_example.sh: -------------------------------------------------------------------------------- 1 | # command | tee filename.txt 2 | nvidia-smi | tee system_information.txt # write 3 | uname -a | tee -a system_information.txt # append -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pytest_repeat.py: -------------------------------------------------------------------------------- 1 | # pip install pytest-repeat 2 | import pytest 3 | 4 | @pytest.mark.repeat(100) 5 | def test_instance_generator(): 6 | pass -------------------------------------------------------------------------------- /code_snippets/data_science_tools/hydra_example/config.yml: -------------------------------------------------------------------------------- 1 | data: data1 2 | variables: 3 | drop_features: ['iid', 'id', 'idg', 'wave'] 4 | categorical_vars: ['undergra', 'zipcode'] -------------------------------------------------------------------------------- /code_snippets/python/fractions_example.py: -------------------------------------------------------------------------------- 1 | from fractions import Fraction 2 | 3 | print(2 / 3 + 1) 4 | # 1.6666666666666665 5 | 6 | print(Fraction(2 / 3 + 1).limit_denominator()) 7 | # 5/3 -------------------------------------------------------------------------------- /code_snippets/cool_tools/install_forked.sh: -------------------------------------------------------------------------------- 1 | # pip install -e git+https://github.com/username/package.git#egg=package 2 | pip install -e git+https://github.com/khuyentran1401/numpy.git#egg=numpy -------------------------------------------------------------------------------- /code_snippets/numpy/double_np_argsort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | a = np.array([2, 1, 4, 7, 3]) 4 | 5 | # Get rank of values in an array 6 | print(a.argsort().argsort()) 7 | # [1 0 3 4 2] -------------------------------------------------------------------------------- /code_snippets/python/join_list.py: -------------------------------------------------------------------------------- 1 | fruits = ['apples', 'oranges', 'grapes'] 2 | 3 | fruits_str = ', '.join(fruits) 4 | 5 | print(f"Today, I need to get some {fruits_str} in the grocery store") -------------------------------------------------------------------------------- /code_snippets/python/pathlib_write/path_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | file = Path('data') 4 | file.open('w').write('Hello!') 5 | 6 | new_p = file.rename(Path("greeting")) 7 | 8 | -------------------------------------------------------------------------------- /code_snippets/python/unpack_iterables.py: -------------------------------------------------------------------------------- 1 | nested_arr = [[1,2,3], ['a','b'], 4] 2 | num_arr, char_arr, num = nested_arr 3 | 4 | print(num_arr) 5 | # [1, 2, 3] 6 | 7 | print(char_arr) 8 | # ['a', 'b'] -------------------------------------------------------------------------------- /code_snippets/python/check_if_number.py: -------------------------------------------------------------------------------- 1 | from numbers import Number 2 | 3 | a = 2 4 | b = 0.4 5 | 6 | print(isinstance(a, Number)) 7 | # True 8 | 9 | print(isinstance(b, Number)) 10 | # True -------------------------------------------------------------------------------- /code_snippets/python/division_operators.py: -------------------------------------------------------------------------------- 1 | # Get a division 2 | print(5 / 2) # 2.5 3 | 4 | # Get remainder of a division 5 | print(5 % 2) # 1 6 | 7 | # Get floor division 8 | print(5 // 2) # 2 9 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/dotenv_example/example.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | 4 | load_dotenv() 5 | PASSWORD = os.getenv('PASSWORD') 6 | print(PASSWORD) 7 | # secret_password -------------------------------------------------------------------------------- /code_snippets/data_science_tools/fetch_openml_example.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_openml 2 | 3 | monk = fetch_openml(name='monks-problems-2', as_frame=True) 4 | print(monk['data'].head(10)) 5 | -------------------------------------------------------------------------------- /code_snippets/pandas/pd_dataframe_iterrows.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) 4 | for idx, row in df.iterrows(): 5 | print(f"a: {row['a']}, b: {row['b']}") -------------------------------------------------------------------------------- /code_snippets/data_science_tools/difflib_example.py: -------------------------------------------------------------------------------- 1 | from difflib import SequenceMatcher 2 | 3 | text1 = 'I am Khuyen' 4 | text2 = 'I am Khuen' 5 | print(SequenceMatcher(a=text1, b=text2).ratio()) 6 | 0.9523809523809523 -------------------------------------------------------------------------------- /code_snippets/pandas/pd_Series_map.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | s = pd.Series(["a", "b", "c"]) 4 | 5 | print(s.map({"a": 1, "b": 2, "c": 3})) 6 | """ 7 | 0 1 8 | 1 2 9 | 2 3 10 | dtype: int64 11 | """ 12 | -------------------------------------------------------------------------------- /code_snippets/python/key_in_max.py: -------------------------------------------------------------------------------- 1 | birth_year = {"Ben": 1997, "Alex": 2000, "Oliver": 1995} 2 | 3 | print(max(birth_year)) 4 | # Oliver 5 | 6 | max_val = max(birth_year, key=lambda k: birth_year[k]) 7 | print(max_val) 8 | # Alex -------------------------------------------------------------------------------- /code_snippets/python/pathlib_path/main.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pandas as pd 3 | 4 | DATA_DIR = Path('data', 'processed') 5 | 6 | df1 = pd.read_csv(DATA_DIR / 'data1.csv') 7 | df2 = pd.read_csv(DATA_DIR / 'data2.csv') -------------------------------------------------------------------------------- /code_snippets/pandas/pd_series_between.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | s = pd.Series([5, 2, 15, 13, 6, 10]) 4 | 5 | print(s[s.between(0, 10)]) 6 | """ 7 | 0 5 8 | 1 2 9 | 4 6 10 | 5 10 11 | dtype: int64 12 | """ -------------------------------------------------------------------------------- /code_snippets/terminal/timeit_command_line.sh: -------------------------------------------------------------------------------- 1 | python -m timeit "arr = [] 2 | for i in range(100): 3 | arr.append(i)" 4 | 5 | python -m timeit "arr = [i for i in range(100)]" 6 | 7 | python -m timeit "arr = list(range(100))" 8 | 9 | -------------------------------------------------------------------------------- /code_snippets/python/boolean_operators.py: -------------------------------------------------------------------------------- 1 | movie_available = True 2 | have_money = False 3 | 4 | get_excited = movie_available | have_money 5 | print(get_excited) 6 | 7 | 8 | buy = movie_available & have_money 9 | print(buy) 10 | -------------------------------------------------------------------------------- /code_snippets/terminal/make_file_immutable.sh: -------------------------------------------------------------------------------- 1 | touch important_file.txt 2 | 3 | # Make the file immutable 4 | sudo chattr +i important_file.txt 5 | 6 | # Check the file attributes 7 | lsattr important_file.txt 8 | 9 | rm important_file.txt -------------------------------------------------------------------------------- /code_snippets/cool_tools/typer_example.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | def process_data(data: str, version: int): 4 | print(f'Processing {data},' 5 | f'version {version}') 6 | 7 | if __name__ == '__main__': 8 | typer.run(process_data) -------------------------------------------------------------------------------- /code_snippets/python/list_extend.py: -------------------------------------------------------------------------------- 1 | # Add a list to a list 2 | a = [1, 2, 3, 4] 3 | a.append([5, 6]) 4 | print(a) # [1, 2, 3, 4, [5, 6]] 5 | 6 | 7 | a = [1, 2, 3, 4] 8 | a.extend([5, 6]) 9 | 10 | print(a) # [1, 2, 3, 4, 5, 6] 11 | -------------------------------------------------------------------------------- /code_snippets/python/list_map.py: -------------------------------------------------------------------------------- 1 | nums = [1, 2, 3] 2 | print(list(map(str, nums))) # ['1', '2', '3'] 3 | 4 | 5 | def multiply_by_two(num: float): 6 | return num * 2 7 | 8 | 9 | print(list(map(multiply_by_two, nums))) # [2, 4, 6] 10 | -------------------------------------------------------------------------------- /code_snippets/python/zip_example.py: -------------------------------------------------------------------------------- 1 | nums = [1, 2, 3, 4] 2 | string = "abcd" 3 | combinations = list(zip(nums, string)) 4 | for comb in combinations: 5 | print(comb) 6 | """ 7 | (1, 'a') 8 | (2, 'b') 9 | (3, 'c') 10 | (4, 'd') 11 | """ 12 | -------------------------------------------------------------------------------- /code_snippets/terminal/bash_for_loop.sh: -------------------------------------------------------------------------------- 1 | datas=(1 3 5) 2 | for data in ${datas[@]} 3 | do 4 | echo Processing data $data 5 | # python process.py data=$data 6 | done 7 | 8 | # Processing data 1 9 | # Processing data 3 10 | # Processing data 5 -------------------------------------------------------------------------------- /code_snippets/cool_tools/tqdm_set_description.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | 3 | from tqdm import tqdm 4 | 5 | fruits = tqdm(["apple", "orange", "grape"]) 6 | for fruit in fruits: 7 | sleep(0.3) 8 | fruits.set_description(f"Picking {fruit}") 9 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/sklearn_rmse.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_squared_error 2 | 3 | y_actual = [1, 2, 3] 4 | y_predicted = [1.5, 2.5, 3.5] 5 | rmse = mean_squared_error(y_actual, y_predicted, squared=False) 6 | print(rmse) # 0.5 7 | -------------------------------------------------------------------------------- /code_snippets/python/eval_example.py: -------------------------------------------------------------------------------- 1 | variable_1 = 'a' 2 | variable_2 = 'b' 3 | variable_3 = 'c' 4 | 5 | print(eval('variable_1')) 6 | # a 7 | 8 | variables = [eval(f'variable_{i}') for i in range(1, 4)] 9 | print(variables) 10 | # ['a', 'b', 'c'] -------------------------------------------------------------------------------- /code_snippets/python/print_error.py: -------------------------------------------------------------------------------- 1 | arr = {'a': [1, 2], 'b': 1} 2 | for key, val in arr.items(): 3 | try: 4 | print(val[0]) 5 | except Exception as e: 6 | print(e) 7 | """ 8 | 1 9 | 'int' object is not subscriptable 10 | """ -------------------------------------------------------------------------------- /code_snippets/python/path_parents/path_parents.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | file = 'test1/test2/test.txt' 4 | print(Path(file).parents[0]) 5 | print(Path(file).parents[1]) 6 | print(Path(file).parents[2]) 7 | print(Path(file).parents[2].resolve()) 8 | -------------------------------------------------------------------------------- /code_snippets/python/set_intersection.py: -------------------------------------------------------------------------------- 1 | requirement1 = ['pandas', 'numpy', 'statsmodel'] 2 | requirement2 = ['numpy', 'statsmodel', 'sympy', 'matplotlib'] 3 | 4 | intersection = set.intersection(set(requirement1), set(requirement2)) 5 | print(list(intersection)) -------------------------------------------------------------------------------- /code_snippets/python/assert_customize_message.py: -------------------------------------------------------------------------------- 1 | def division(num1: int, num2: int): 2 | assert num2 != 0, "“num2 must be different from 0" 3 | return num1 / num2 4 | 5 | 6 | division(2, 0) 7 | """ 8 | AssertionError: “num2 must be different from 0 9 | """ -------------------------------------------------------------------------------- /code_snippets/numpy/np_where.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | arr = np.array([[1, 4, 10, 15], [2, 3, 8, 9]]) 4 | 5 | # Multiply values that are less than 5 by 2 6 | print(np.where(arr < 5, arr * 2, arr)) 7 | """ 8 | [[ 2 8 10 15] 9 | [ 4 6 8 9]] 10 | """ 11 | -------------------------------------------------------------------------------- /code_snippets/python/kwargs.py: -------------------------------------------------------------------------------- 1 | parameters = {'a': 1, 'b': 2} 2 | def example(c, **kwargs): 3 | print(kwargs) 4 | for val in kwargs.values(): 5 | print(c + val) 6 | 7 | example(c=3, **parameters) 8 | """ 9 | {'a': 1, 'b': 2} 10 | 4 11 | 5 12 | """ -------------------------------------------------------------------------------- /code_snippets/numpy/use_list_to_change_position_of_arrays.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) 4 | print("Old array\n", arr) 5 | 6 | new_row_position = [1, 2, 0] 7 | new_arr = arr[new_row_position, : ] 8 | print("New array\n", new_arr) 9 | -------------------------------------------------------------------------------- /code_snippets/python/collections_defaultdict.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | classes = defaultdict(lambda: 'Outside') 4 | classes['Math'] = 'B23' 5 | classes['Physics'] = 'D24' 6 | print(classes['Math']) 7 | # B23 8 | 9 | print(classes['English']) 10 | # Outside -------------------------------------------------------------------------------- /code_snippets/python/os_system.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.system("echo Files in the current directory are:") 4 | os.system("ls") 5 | 6 | """ 7 | Files in the current directory are: 8 | cool_tools create_snippet.sh data_science_tools numpy pandas python terminal 9 | """ 10 | -------------------------------------------------------------------------------- /code_snippets/python/partial_function.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | 4 | def linear_func(x, a, b): 5 | return a * x + b 6 | 7 | 8 | linear_func_partial = partial(linear_func, a=2, b=3) 9 | print(linear_func_partial(2)) 10 | print(linear_func_partial(4)) 11 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/ngrok_example.sh: -------------------------------------------------------------------------------- 1 | # Generate config 2 | 3 | jupyter notebook --generate-config 4 | # Allow remote access 5 | 6 | echo "c.NotebookApp.allow_remote_access = True" >> ~/.jupyter/jupyter_notebook_config.py 7 | 8 | jupyter notebook 9 | 10 | ngrok http 8888 11 | -------------------------------------------------------------------------------- /code_snippets/python/copy_method.py: -------------------------------------------------------------------------------- 1 | # Instead of this 2 | l1 = [1, 2, 3] 3 | l2 = l1 4 | l2.append(4) 5 | print(l2) 6 | # [1, 2, 3, 4] 7 | 8 | print(l1) 9 | # [1, 2, 3, 4] 10 | 11 | # Do this 12 | l1 = [1, 2, 3] 13 | l2 = l1.copy() 14 | l2.append(4) 15 | print(l1) 16 | # [1, 2, 3] -------------------------------------------------------------------------------- /code_snippets/python/re_sub_example.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | text = 'Today is 3/7/2021' 4 | match_pattern = r'(\d+)/(\d+)/(\d+)' 5 | 6 | print(re.sub(match_pattern, 'Sunday', text)) 7 | # Today is Sunday 8 | 9 | print(re.sub(match_pattern, r'\3-\1-\2', text)) 10 | # Today is 2021-3-7 -------------------------------------------------------------------------------- /code_snippets/cool_tools/box_example.py: -------------------------------------------------------------------------------- 1 | from box import Box 2 | 3 | food_box = Box({"food": {"fruit": {"name": "apple", "flavor": "sweet"}}}) 4 | 5 | print(food_box) 6 | # {'food': {'fruit': {'name': 'apple', 'flavor': 'sweet'}}} 7 | 8 | print(food_box.food.fruit.name) 9 | # apple 10 | -------------------------------------------------------------------------------- /code_snippets/pandas/df_columns_str_start_with.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'pricel': [1, 2, 3], 4 | 'price2': [2, 3, 4], 5 | 'year': [2020, 2021, 2021]}) 6 | 7 | mask = df.columns.str.startswith('price') 8 | print(df.loc[:, mask]) 9 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/strip_interactive_example.py: -------------------------------------------------------------------------------- 1 | 2 | from strip_interactive import run_interactive 3 | 4 | code = """ 5 | >>> import numpy as np 6 | >>> print(np.array([1,2,3])) 7 | [1 2 3] 8 | >>> print(np.array([4,5,6])) 9 | [4 5 6] 10 | """ 11 | 12 | run_interactive(code) 13 | 14 | -------------------------------------------------------------------------------- /code_snippets/pandas/pd_dataframe_agg.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import Counter 3 | import pandas as pd 4 | 5 | 6 | def count_two(nums: list): 7 | return Counter(nums)[2] 8 | 9 | 10 | df = pd.DataFrame({"coll": [1, 3, 5], "col2": [2, 4, 6]}) 11 | print(df.agg(["sum", count_two])) 12 | -------------------------------------------------------------------------------- /code_snippets/pandas/pd_series_dt.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'date': ['2021/05/13 15:00', '2022-6-20 14:00'], 4 | 'values': [1, 3]}) 5 | 6 | df['date'] = pd.to_datetime(df['date']) 7 | 8 | print(df['date'].dt.year) 9 | 10 | print(df['date'].dt.time) 11 | 12 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/snsscrape_example.sh: -------------------------------------------------------------------------------- 1 | # Scrape all tweets from @KhuyenTran16 2 | snscrape twitter-user KhuyenTran16 3 | 4 | # Save outputs 5 | snscrape twitter-user KhuyenTran16 >> khuyen_tweets 6 | 7 | # Scrape 100 tweets with hashtag python 8 | snscrape --max-results 100 twitter-hashtag python -------------------------------------------------------------------------------- /code_snippets/numpy/np_comparison.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | a = np.array([1, 2, 3]) 4 | b = np.array([4, 1, 2]) 5 | 6 | print(a < 2) 7 | """ 8 | [ True False False] 9 | """ 10 | 11 | print(a < b) 12 | """ 13 | [ True False False] 14 | """ 15 | 16 | print(a[a < b]) 17 | """ 18 | [1] 19 | """ -------------------------------------------------------------------------------- /code_snippets/python/zip_function.py: -------------------------------------------------------------------------------- 1 | nums = [1, 2, 3, 4] 2 | chars = ['a', 'b', 'c', 'd'] 3 | 4 | comb = list(zip(nums, chars)) 5 | print(comb) 6 | # [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')] 7 | 8 | nums_2, chars_2 = zip(*comb) 9 | print(nums_2, chars_2) 10 | # (1, 2, 3, 4) ('a', 'b', 'c', 'd') -------------------------------------------------------------------------------- /code_snippets/data_science_tools/gdown_example.py: -------------------------------------------------------------------------------- 1 | # pip install gdown 2 | import gdown 3 | 4 | # Format of url: https://drive.google.com/uc?id=YOURFILEID 5 | url = 'https://drive.google.com/uc?id=1jI1cmxqnwsmC-vbl8dNY6b4aNBtBbKy3' 6 | output = 'Twitter.zip' 7 | 8 | gdown.download(url, output, quiet=False) 9 | -------------------------------------------------------------------------------- /code_snippets/python/pass_statement.py: -------------------------------------------------------------------------------- 1 | def say_hello(): 2 | pass 3 | 4 | def ask_to_sign_in(): 5 | pass 6 | 7 | def main(is_user: bool): 8 | if is_user: 9 | say_hello() 10 | else: 11 | ask_to_sign_in() 12 | 13 | if __name__ == '__main__': 14 | main(is_user=True) -------------------------------------------------------------------------------- /code_snippets/numpy/np_array_all.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | a = np.array([[1, 2, 1], [2, 2, 5]]) 4 | print(a) 5 | 6 | # Find if all elements are less than 3 in each column 7 | print((a < 3).all(axis=0)) 8 | 9 | # Find if all elements are less than 3 in each row 10 | print((a < 3).all(axis=1)) 11 | -------------------------------------------------------------------------------- /code_snippets/python/set_difference.py: -------------------------------------------------------------------------------- 1 | a = [1, 2, 3, 4] 2 | b = [1, 3, 4, 5, 6] 3 | 4 | # Find elements in a but not in b 5 | diff = set(a).difference(set(b)) 6 | print(list(diff)) # [2] 7 | 8 | # Find elements in b but not in a 9 | diff = set(b).difference(set(a)) 10 | print(list(diff)) # [5, 6] 11 | -------------------------------------------------------------------------------- /code_snippets/pandas/pd_groupby_sample.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({"col1": ["a", "a", "b", "c", "c", "d"], "col2": [4, 5, 6, 7, 8, 9]}) 4 | print(df.groupby("col1").sample(n=1)) 5 | 6 | """ 7 | col1 col2 8 | 0 a 4 9 | 2 b 6 10 | 4 c 8 11 | 5 d 9 12 | """ 13 | -------------------------------------------------------------------------------- /code_snippets/terminal/create_symbolic_link.sh: -------------------------------------------------------------------------------- 1 | information 2 | # command not found: information 3 | 4 | which htop 5 | # /usr/bin/htop 6 | 7 | # Create symbolic link between 2 files 8 | # In -s [EXISTING_FILE] [NEW_FILE] 9 | sudo ln -s /usr/bin/htop /usr/bin/information 10 | 11 | information 12 | # Works now! -------------------------------------------------------------------------------- /code_snippets/pandas/df_agg.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, 5]}) 4 | 5 | print(df.agg({"a": ["sum", "mean"], "b": ["min", "max"]})) 6 | 7 | """ 8 | a b 9 | sum 10.0 NaN 10 | mean 2.5 NaN 11 | min NaN 2.0 12 | max NaN 5.0 13 | """ 14 | -------------------------------------------------------------------------------- /code_snippets/python/slice_example.py: -------------------------------------------------------------------------------- 1 | data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 2 | 3 | # Instead of this 4 | some_sum = sum(data[:8]) * sum(data[8:]) 5 | 6 | # do this 7 | JANUARY = slice(0, 8) 8 | FEBRUARY = slice(8, len(data)) 9 | some_sum = sum(data[JANUARY] * sum(data[FEBRUARY])) 10 | print(some_sum) 11 | # 684 -------------------------------------------------------------------------------- /code_snippets/terminal/tr_command.sh: -------------------------------------------------------------------------------- 1 | echo $PATH 2 | # /home/user/Python-data-science-code-snippet/venv/bin:/home/khuyentran/anaconda3/bin:/home/user/.poetry/bin 3 | 4 | echo $PATH | tr ":" "\n" 5 | """ 6 | /home/user/Python-data-science-code-snippet/venv/bin 7 | /home/khuyentran/anaconda3/bin 8 | /home/user/.poetry/bin 9 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/distfit_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from distfit import distfit 3 | 4 | X = np.random.normal(0, 3, 1000) 5 | 6 | # Initialize model 7 | dist = distfit() 8 | 9 | # Find best theoretical distribution for empirical data X 10 | distribution = dist.fit_transform(X) 11 | dist.plot() 12 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/extract_various_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | import pandas_datareader.data as web 4 | 5 | df = web.DataReader('AD', 'av-daily', start=datetime(2008, 1, 1), 6 | end = datetime(2018, 2, 28), 7 | api_key=os.getenv('ALPHAVANTAGE_API_KEY')) -------------------------------------------------------------------------------- /code_snippets/python/filter_example.py: -------------------------------------------------------------------------------- 1 | def get_fruit(val: str): 2 | fruits = ['apple', 'orange', 'grape'] 3 | if val in fruits: 4 | return True 5 | else: 6 | return False 7 | 8 | items = ['chair', 'apple', 'water', 'table', 'orange'] 9 | fruits = filter(get_fruit, items) 10 | print(list(fruits)) -------------------------------------------------------------------------------- /code_snippets/cool_tools/heart_rate.py: -------------------------------------------------------------------------------- 1 | import heartrate 2 | heartrate.trace(browser=True) 3 | 4 | def factorial(x): 5 | if x == 1: 6 | return 1 7 | else: 8 | return (x * factorial(x-1)) 9 | 10 | 11 | if __name__ == "__main__": 12 | num = 5 13 | print(f"The factorial of {num} is {factorial(num)}") -------------------------------------------------------------------------------- /code_snippets/data_science_tools/faker_example.py: -------------------------------------------------------------------------------- 1 | # pip install faker 2 | from faker import Faker 3 | 4 | fake = Faker() 5 | 6 | print(fake.color_name()) 7 | 8 | print(fake.name()) 9 | 10 | print(fake.address()) 11 | 12 | print(fake.date_of_birth(minimum_age=22)) 13 | 14 | print(fake.city()) 15 | 16 | print(fake.job()) 17 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/maya_example.py: -------------------------------------------------------------------------------- 1 | import maya 2 | 3 | # Automatically parse datetime string 4 | string = '2016-12-16 18:23:45.423992+00:00' 5 | print(maya.parse(string).datetime()) 6 | # 2016-12-16 18:23:45.423992+00:00 7 | 8 | print(maya.parse(string).datetime(to_timezone='US/Central')) 9 | # 2016-12-16 12:23:45.423992-06:00 -------------------------------------------------------------------------------- /code_snippets/python/return_multiple_values_with_dictionary.py: -------------------------------------------------------------------------------- 1 | def return_many_values(): 2 | a = 1 3 | b = 2 4 | c = 3 5 | d = 4 6 | # Instead of return a, b, c, d 7 | return {'a': a, 'b': b, 'c': c, 'd': d} 8 | 9 | values = return_many_values() 10 | print(values['a']) 11 | # 1 12 | print(values['b']) 13 | # 2 -------------------------------------------------------------------------------- /code_snippets/pandas/pct_change.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'a': [20, 35, 10], 'b': [1, 2, 3]}) 4 | print(df) 5 | """ 6 | a b 7 | 0 20 1 8 | 1 35 2 9 | 2 10 3 10 | """ 11 | 12 | print(df.a.pct_change()) 13 | """ 14 | 0 NaN 15 | 1 0.750000 16 | 2 -0.714286 17 | Name: a, dtype: float64 18 | """ -------------------------------------------------------------------------------- /code_snippets/python/collections_ordereddict.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | tasks = collections.OrderedDict(laundry=0.5, shopping=2, clean=2) 4 | tasks['movie'] = 2 5 | print(tasks) 6 | # OrderedDict([('laundry', 0.5), ('shopping', 2), ('clean', 2), ('movie', 2)]) 7 | 8 | print(tasks.keys()) 9 | # odict_keys(['laundry', 'shopping', 'clean', 'movie']) -------------------------------------------------------------------------------- /code_snippets/python/pathlib_iterate_files_end_with/main.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | directory_name = 'data' 4 | 5 | # Loop files in a directory 6 | pathlist = Path(directory_name).rglob('*.csv') 7 | for path in pathlist: 8 | print(str(path)) 9 | 10 | """ 11 | data/data3.csv 12 | data/data1.csv 13 | data/data2.csv 14 | """ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: 20.8b1 4 | hooks: 5 | - id: black 6 | - repo: https://gitlab.com/pycqa/flake8 7 | rev: 3.8.4 8 | hooks: 9 | - id: flake8 10 | - repo: https://github.com/timothycrosley/isort 11 | rev: 5.7.0 12 | hooks: 13 | - id: isort 14 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pytest_benchmark_example.py: -------------------------------------------------------------------------------- 1 | def list_comprehension(len_list=5): 2 | return [i for i in range(len_list)] 3 | 4 | 5 | def test_concat(benchmark): 6 | res = benchmark(list_comprehension) 7 | assert res == [0, 1, 2, 3, 4] 8 | 9 | 10 | """On your terminal 11 | pytest pytest_benchmark.py 12 | """ 13 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/snoop_example.py: -------------------------------------------------------------------------------- 1 | # pip install snoop 2 | import snoop 3 | 4 | @snoop 5 | def factorial(x: int): 6 | if x == 1: 7 | return 1 8 | else: 9 | return (x * factorial(x-1)) 10 | 11 | if __name__ == '__main__': 12 | num = 2 13 | print(f'The factorial of {num} is {factorial(num)}') 14 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/mypy_example.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | def get_name_price(fruits: list) -> Union[list, tuple]: 4 | return zip(*fruits) 5 | 6 | fruits = [('apple', 2), ('orange', 3), ('grape', 2)] 7 | names, prices = get_name_price(fruits) 8 | print(names) # ('apple', 'orange', 'grape') 9 | print(prices) # (2, 3, 2) 10 | -------------------------------------------------------------------------------- /code_snippets/numpy/any_all.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | a = np.array([[1, 2, 1], [2, 2, 5]]) 4 | 5 | # get the rows whose all values are fewer than 3 6 | mask_all = (a<3).all(axis=1) 7 | print(a[mask_all]) 8 | """ 9 | [[1 2 1]] 10 | """ 11 | 12 | mask_any = (a<3).any(axis=1) 13 | print(a[mask_any]) 14 | """ 15 | [[1 2 1] 16 | [2 2 5]] 17 | """ -------------------------------------------------------------------------------- /code_snippets/numpy/np_linspace.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | x = np.linspace(2, 4, num=10) 5 | print(x) 6 | """ 7 | [2. 2.22222222 2.44444444 2.66666667 2.88888889 3.11111111 8 | 3.33333333 3.55555556 3.77777778 4. ] 9 | """ 10 | 11 | y = np.arange(10) 12 | 13 | plt.plot(x, y) 14 | plt.show() 15 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/yarl_example.py: -------------------------------------------------------------------------------- 1 | from yarl import URL 2 | 3 | url = URL('https://github.com') 4 | new_url = url/ "search" % 'q=data+science' 5 | print(new_url) # https://github.com/search?q=data+science 6 | 7 | print(new_url.host) # github.com 8 | print(new_url.path) # /search 9 | print(new_url.query_string) # q=machine learning 10 | 11 | 12 | -------------------------------------------------------------------------------- /code_snippets/pandas/exclude_outliers.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | data = {"col0": [9, -3, 0, -1, 5]} 4 | df = pd.DataFrame(data) 5 | 6 | lower = df.col0.quantile(0.05) 7 | upper = df.col0.quantile(0.95) 8 | 9 | print(df.clip(lower=lower, upper=upper)) 10 | """ 11 | col0 12 | 0 8.2 13 | 1 -2.6 14 | 2 0.0 15 | 3 -1.0 16 | 4 5.0 17 | """ 18 | -------------------------------------------------------------------------------- /code_snippets/python/__call__example.py: -------------------------------------------------------------------------------- 1 | class DataLoader: 2 | 3 | def __init__(self, data_dir: str): 4 | self.data_dir = data_dir 5 | print("Instance is created") 6 | 7 | def __call__(self): 8 | print("Instance is called") 9 | 10 | data_loader = DataLoader('my_data_dir') 11 | # Instance is created 12 | 13 | data_loader() 14 | # Instance is called -------------------------------------------------------------------------------- /code_snippets/python/enumerate_example.py: -------------------------------------------------------------------------------- 1 | arr = ['a', 'b', 'c', 'd', 'e'] 2 | 3 | # Instead of this 4 | for i in range(len(arr)): 5 | print(i, arr[i]) 6 | """ 7 | 0 a 8 | 1 b 9 | 2 c 10 | 3 d 11 | 4 e 12 | """ 13 | 14 | # Use this 15 | for i, val in enumerate(arr): 16 | print(i, val) 17 | """ 18 | 0 a 19 | 1 b 20 | 2 c 21 | 3 d 22 | 4 e 23 | """ -------------------------------------------------------------------------------- /code_snippets/python/multiprocessing_example.py: -------------------------------------------------------------------------------- 1 | from joblib import Parallel, delayed 2 | import multiprocessing 3 | 4 | def add_three(num: int): 5 | return num + 3 6 | 7 | num_cores = multiprocessing.cpu_count() 8 | results = Parallel(n_jobs=num_cores)(delayed(add_three)(i) for i in range(10)) 9 | print(results) 10 | # [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] -------------------------------------------------------------------------------- /code_snippets/python/string_find.py: -------------------------------------------------------------------------------- 1 | sentence = "Today is Saturaday" 2 | 3 | # Find the index of first occurrence of the substring 4 | print(sentence.find("day") ) 5 | # 2 6 | 7 | # Start searching for the substring at index 3 8 | print(sentence.find("day", 3)) 9 | # 17 10 | 11 | print(sentence.find("nice")) 12 | #-1 13 | # No substring is found 14 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/knockknock_example.py: -------------------------------------------------------------------------------- 1 | from knockknock import email_sender 2 | 3 | @email_sender(recipient_emails=['', ''], 4 | sender_email="") 5 | def train_your_nicest_model(your_nicest_parameters): 6 | import time 7 | time.sleep(10_000) 8 | return {'loss': 0.9} 9 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/streamlit_app.py: -------------------------------------------------------------------------------- 1 | # pip install spacy-streamlit 2 | # python -m spacy download en_core_web_sm 3 | 4 | 5 | import spacy_streamlit 6 | 7 | models = ['en_core_web_sm'] 8 | text = "Today is a beautiful day" 9 | spacy_streamlit.visualize(models, text) 10 | 11 | """On your terminal, type: 12 | streamlit run streamlit_app.py 13 | """ -------------------------------------------------------------------------------- /code_snippets/pandas/df_assign.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 4 | 5 | df = (df.assign(col3=lambda x: x.col1 * 100 + x.col2) 6 | .assign(col4=lambda x: x.col2 * x.col3) 7 | ) 8 | print(df) 9 | """ 10 | col1 col2 col3 col4 11 | 0 1 3 103 309 12 | 1 2 4 204 816 13 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/extract_holidays.py: -------------------------------------------------------------------------------- 1 | # pip install holidays 2 | from datetime import date 3 | import holidays 4 | 5 | us_holidays = holidays.UnitedStates() 6 | 7 | print('2014-07-04' in us_holidays) 8 | # True 9 | 10 | print(us_holidays.get('2014-7-4')) 11 | # Independence Day 12 | 13 | print(us_holidays.get('2014/7/4')) 14 | # Independence Day 15 | -------------------------------------------------------------------------------- /code_snippets/pandas/pd_cut.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'a': [1, 3, 7, 11, 14, 17]}) 4 | 5 | bins = [0, 5, 10, 15, 20] 6 | df['binned'] = pd.cut(df['a'], bins=bins) 7 | 8 | print(df) 9 | """ 10 | a binned 11 | 0 1 (0, 5] 12 | 1 3 (0, 5] 13 | 2 7 (5, 10] 14 | 3 11 (10, 15] 15 | 4 14 (10, 15] 16 | 5 17 (15, 20] 17 | """ -------------------------------------------------------------------------------- /code_snippets/terminal/sed_command/sed_command.sh: -------------------------------------------------------------------------------- 1 | cat weather.txt 2 | # Today is a sunny day. I want to be outside. 3 | 4 | sed 's/sunny/rainy/' weather.txt 5 | # Today is a rainy day. I want to be outside. 6 | 7 | sed -i 's/sunny/rainy/' weather.txt 8 | sed -i 's/outside/inside/' weather.txt 9 | 10 | cat weather.txt 11 | # Today is a rainy day. I want to be inside -------------------------------------------------------------------------------- /code_snippets/data_science_tools/convert_number_to_words.py: -------------------------------------------------------------------------------- 1 | # pip install num2words 2 | from num2words import num2words 3 | 4 | print(num2words(105)) 5 | # one hundred and five 6 | 7 | print(num2words(105, to='ordinal')) 8 | # one hundred and fifth 9 | 10 | print(num2words(105, lang='vi')) 11 | # một trăm lẻ năm 12 | 13 | print(num2words(105, lang='es')) 14 | # ciento cinco -------------------------------------------------------------------------------- /code_snippets/data_science_tools/hydra_example/main.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | 3 | @hydra.main(config_name='config.yml') 4 | def main(config): 5 | print(f'Process {config.data}') 6 | print(f'Drop features: {config.variables.drop_features}') 7 | 8 | if __name__ == '__main__': 9 | main() 10 | 11 | """ 12 | Process data1 13 | Drop features: ['iid', 'id', 'idg', 'wave'] 14 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/tqdm_example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from tqdm import tqdm 3 | import time 4 | 5 | df = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [2, 3, 4, 5, 6]}) 6 | 7 | tqdm.pandas() 8 | def func(row): 9 | time.sleep(1) 10 | return row + 1 11 | 12 | df['a'].progress_apply(func) 13 | """ 14 | 80%|██████████████████████████▍ | 4/5 [00:03<00:00, 1.22it/s] 15 | """ -------------------------------------------------------------------------------- /code_snippets/python/namedtuple_example.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Person = namedtuple("Person", "name gender") 4 | 5 | oliver = Person("Oliver", "male") 6 | khuyen = Person("Khuyen", "female") 7 | 8 | print(oliver) 9 | # Person(name='Oliver', gender='male') 10 | 11 | print(khuyen) 12 | # Person(name='Khuyen', gender='female') 13 | 14 | print(oliver.name) 15 | # Oliver 16 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/schedule_example.py: -------------------------------------------------------------------------------- 1 | import schedule 2 | import time 3 | 4 | def get_incoming_data(): 5 | print("Get incoming data") 6 | 7 | def train_model(): 8 | print("Retraining model") 9 | 10 | schedule.every().day.at("10:30").do(get_incoming_data) 11 | schedule.every().wednesday.at("08:00").do(train_model) 12 | 13 | while True: 14 | schedule.run_pending() 15 | time.sleep(1) -------------------------------------------------------------------------------- /code_snippets/pandas/pd_reindex.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | s = pd.Series([1, 2, 3], index=['2021-07-20', '2021-07-23', '2021-07-25']) 4 | s.index = pd.to_datetime(s.index) 5 | print(s) 6 | 7 | # Get dates ranging from 2021/7/20 to 2021/7/25 8 | new_index = pd.date_range('2021-07-20', '2021-07-25') 9 | 10 | # Conform Series to new index 11 | new_s = s.reindex(new_index, fill_value=0) 12 | print(new_s) -------------------------------------------------------------------------------- /code_snippets/pandas/select_columns_start_with.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5], "year": [2019, 2019, 2020]}) 4 | 5 | chosen_cols = df.columns.str.startswith('col') 6 | print(chosen_cols) 7 | """[ True True False]""" 8 | 9 | print(df.loc[:, chosen_cols]) 10 | """ 11 | col1 col2 12 | 0 1 3 13 | 1 2 4 14 | 2 3 5 15 | """ 16 | -------------------------------------------------------------------------------- /code_snippets/python/compare_execution_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | import timeit 3 | 4 | def func(): 5 | """comprehension""" 6 | l = [i for i in range(10_000)] 7 | 8 | def func2(): 9 | """list range""" 10 | l = list(range(10_000)) 11 | 12 | expSize = 1000 13 | time1 = timeit.timeit(func, number=expSize) 14 | time2 = timeit.timeit(func2, number=expSize) 15 | 16 | print(time1/time2) 17 | # 1.738841810509789 -------------------------------------------------------------------------------- /code_snippets/python/defaultdict_example.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | # Instead of this 4 | food_price = {'apple': [], 'orange': []} 5 | 6 | # Use this 7 | food_price = defaultdict(list) 8 | 9 | for i in range(1, 4): 10 | food_price['apple'].append(i) 11 | food_price['orange'].append(i) 12 | 13 | print(food_price.items()) 14 | # dict_items([('apple', [1, 2, 3]), ('orange', [1, 2, 3])]) 15 | -------------------------------------------------------------------------------- /code_snippets/pandas/df_fillna.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | df = pd.DataFrame({"a": [1, np.nan, 3], "b": [4, 5, np.nan], "c": [1, 2, 3]}) 5 | print(df) 6 | """ 7 | a b c 8 | 0 1.0 4.0 1 9 | 1 NaN 5.0 2 10 | 2 3.0 NaN 3 11 | """ 12 | 13 | df = df.fillna(method="ffill") 14 | print(df) 15 | """ 16 | a b c 17 | 0 1.0 4.0 1 18 | 1 1.0 5.0 2 19 | 2 3.0 5.0 3 20 | """ 21 | -------------------------------------------------------------------------------- /code_snippets/pandas/s.is_in.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) 4 | print(df) 5 | """ 6 | a b 7 | 0 1 4 8 | 1 2 5 9 | 2 3 6 10 | """ 11 | 12 | l = [1, 2, 6, 7] 13 | print(df.a.isin(l)) 14 | """ 15 | 0 True 16 | 1 True 17 | 2 False 18 | Name: a, dtype: bool 19 | """ 20 | 21 | df = df[df.a.isin(l)] 22 | print(df) 23 | """ 24 | a b 25 | 0 1 4 26 | 1 2 5 27 | """ -------------------------------------------------------------------------------- /code_snippets/python/__str__and__repr.py: -------------------------------------------------------------------------------- 1 | class Food: 2 | def __init__(self, name: str, color: str): 3 | self.name = name 4 | self.color = color 5 | 6 | def __str__(self): 7 | return f'{self.color} {self.name}' 8 | 9 | def __repr__(self): 10 | return f'Food({self.color}, {self.name})' 11 | 12 | food = Food('apple', 'red') 13 | 14 | print(food) # str__ 15 | 16 | print(repr(food)) #__repr__ 17 | -------------------------------------------------------------------------------- /code_snippets/python/datetime_timedelta.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime, timedelta 2 | 3 | beginning = '2020/01/03 23:59:00' 4 | duration_in_minutes = 2500 5 | 6 | # Find the beginning time 7 | beginning = datetime.strptime(beginning, '%Y/%m/%d %H:%M:%S') 8 | 9 | # Find duration in days 10 | days = timedelta(minutes=duration_in_minutes) 11 | 12 | # Find end time 13 | end = beginning + days 14 | print(end) 15 | # 2020-01-05 17:39:00 -------------------------------------------------------------------------------- /code_snippets/python/improve_json_readability.py: -------------------------------------------------------------------------------- 1 | import json 2 | pet = dict( 3 | kind="dog", 4 | name= "Bim Bim", 5 | age=7, 6 | favorite_food='yogurt' 7 | ) 8 | print(json.dumps(pet)) 9 | # {"kind": "dog", "name": "Bim Bim", "age": 7, "favorite_food": "yogurt"} 10 | 11 | print(json.dumps(pet, indent=4)) 12 | """ 13 | { 14 | "kind": "dog", 15 | "name": "Bim Bim", 16 | "age": 7, 17 | "favorite_food": "yogurt" 18 | } 19 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/rich_console.py: -------------------------------------------------------------------------------- 1 | # pip install rich 2 | from rich import console 3 | from rich.console import Console 4 | import pandas as pd 5 | 6 | console = Console() 7 | 8 | data = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) 9 | 10 | def edit_data(data): 11 | var_1 = 45 12 | var_2 = 30 13 | var_3 = var_1 + var_2 14 | data['a'] = [var_1, var_2, var_3] 15 | console.log(data, log_locals=True) 16 | 17 | edit_data(data) -------------------------------------------------------------------------------- /code_snippets/python/itertools_combinations_example.py: -------------------------------------------------------------------------------- 1 | from itertools import combinations 2 | num_list = [1, 2, 3] 3 | for i in num_list: # instead of this 4 | for j in num_list: 5 | if i != j: 6 | print(i, j) 7 | """ 8 | 1 2 9 | 1 3 10 | 2 1 11 | 2 3 12 | 3 1 13 | 3 2 14 | """ 15 | comb = combinations(num_list, 2) # use this 16 | for pair in list(comb): 17 | print(pair) 18 | """ 19 | (1, 2) 20 | (1, 3) 21 | (2, 3) 22 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/geopy_example.py: -------------------------------------------------------------------------------- 1 | # pip install geopy 2 | 3 | from geopy.geocoders import Nominatim 4 | geolocator = Nominatim(user_agent='find_location') 5 | location = geolocator.geocode('30 North Circle Drive, Edwardsville, IL') 6 | 7 | print(location.address) 8 | # 30, Circle Drive, Edwardsville, Madison County, Illinois, 62025, United States 9 | 10 | print(location.latitude, location.longitude) 11 | # 38.80371599362934 -89.93842706888563 -------------------------------------------------------------------------------- /code_snippets/python/list_comprehension.py: -------------------------------------------------------------------------------- 1 | from timeit import timeit 2 | 3 | def for_loop(): 4 | result = [] 5 | for i in range(1_000_000): 6 | result.append(i) 7 | return result 8 | 9 | def list_comprehesion(): 10 | return [i for i in range(1_000_000)] 11 | 12 | expSize = 1000 13 | time1 = timeit(for_loop, number=expSize) 14 | time2 = timeit(list_comprehesion, number=expSize) 15 | 16 | print(time1/time2) 17 | # 1.4560360180596434 18 | 19 | -------------------------------------------------------------------------------- /code_snippets/python/staticmethod_example.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class ProcessText: 4 | def __init__(self, text_column: str): 5 | self.text_column = text_column 6 | 7 | @staticmethod 8 | def remove_URL(sample: str) -> str: 9 | """Replace url with empty space""" 10 | return re.sub(r'http\S+', "", sample) 11 | 12 | text = ProcessText.remove_URL('My favorite page is https://www.google.com') 13 | print(text) 14 | # My favorite page is -------------------------------------------------------------------------------- /code_snippets/data_science_tools/wordfreq_example.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | from wordfreq import word_frequency 4 | 5 | print(word_frequency("eat", "en")) # 0.000135 6 | print(word_frequency("the", "en")) # 0.0537 7 | 8 | sentence = "There is a dog running in a park" 9 | words = sentence.split(" ") 10 | word_frequencies = [word_frequency(word, "en") for word in words] 11 | 12 | sns.barplot(words, word_frequencies) 13 | plt.show() 14 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/dtreeviz_example.py: -------------------------------------------------------------------------------- 1 | from dtreeviz.trees import dtreeviz 2 | from sklearn import tree 3 | from sklearn.datasets import load_wine 4 | 5 | wine = load_wine() 6 | classifier = tree.DecisionTreeClassifier(max_depth=2) 7 | classifier.fit(wine.data, wine.target) 8 | 9 | vis = dtreeviz( 10 | classifier, 11 | wine.data, 12 | wine.target, 13 | target_name="wine_type", 14 | feature_names=wine.feature_names, 15 | ) 16 | 17 | vis.view() 18 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pytest_example.py: -------------------------------------------------------------------------------- 1 | from textblob import TextBlob 2 | 3 | def extract_sentiment(text: str): 4 | '''Extract sentiment using textblob. 5 | Polarity is within range [-1, 1]''' 6 | 7 | text = TextBlob(text) 8 | 9 | return text.sentiment.polarity 10 | 11 | def test_extract_sentiment_negative(): 12 | 13 | text = "I do not think this will turn out well" 14 | 15 | sentiment = extract_sentiment(text) 16 | 17 | assert sentiment < 0 -------------------------------------------------------------------------------- /code_snippets/pandas/pd_series_str.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | fruits = pd.Series(['Orange', 'Apple', 'Grape']) 4 | print(fruits) 5 | """ 6 | 0 Orange 7 | 1 Apple 8 | 2 Grape 9 | dtype: object 10 | """ 11 | 12 | print(fruits.str.lower()) 13 | """ 14 | 0 orange 15 | 1 apple 16 | 2 grape 17 | dtype: object 18 | """ 19 | 20 | print(fruits.str.lower().str.replace("e", "a")) 21 | """ 22 | 0 oranga 23 | 1 appla 24 | 2 grapa 25 | dtype: object 26 | """ 27 | 28 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/dvc_example.sh: -------------------------------------------------------------------------------- 1 | # Initialize 2 | dvc init 3 | 4 | # Track data directory 5 | dvc add data # Create data.dvc 6 | git add data.dvc 7 | git commit -m "add data" 8 | 9 | # Store the data remotely 10 | dvc remote add -d remote gdrive://lynNBbT-4J0ida0eKYQqZZbC93juUUUbVH 11 | 12 | # Push the data to remote storage 13 | dvc push 14 | 15 | # Get the data 16 | dvc pull 17 | 18 | # Switch between different version 19 | git checkout HEAD^1 data.dvc 20 | dvc checkout 21 | -------------------------------------------------------------------------------- /code_snippets/python/multiples_of_a_number.py: -------------------------------------------------------------------------------- 1 | def get_multiples_of_n(nums: list, n: int): 2 | """Select only numbers whose remainders 3 | are 0 when dividing them by n""" 4 | return [num for num in nums if num % n == 0] 5 | 6 | nums = [1, 4, 9, 12, 15, 16] 7 | 8 | print(get_multiples_of_n(nums, 2)) # multiples of 2 9 | # [4, 12, 16] 10 | 11 | print(get_multiples_of_n(nums, 3)) # multiples of 3 12 | # [9, 12, 15] 13 | 14 | print(get_multiples_of_n(nums, 4)) # multiples of 4 15 | # [4, 12, 16] 16 | -------------------------------------------------------------------------------- /code_snippets/numpy/numpy_testing_almost_equal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_almost_equal, assert_array_equal 3 | 4 | a = np.array([[1.222, 2.222], [3.222, 4.222]]) 5 | test = np.array([[1.221, 2.221], [3.221, 4.221]]) 6 | assert_almost_equal(a, test, decimal=2) 7 | 8 | assert_array_equal(a, test) 9 | """AssertionError: 10 | Arrays are not equal 11 | 12 | Mismatched elements: 4 / 4 (100%) 13 | Max absolute difference: 0.001 14 | Max relative difference: 0.000819 15 | """ 16 | -------------------------------------------------------------------------------- /code_snippets/python/collections_counter.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | char_list = ['a', 'b', 'c', 'a', 'd', 'b', 'b'] 4 | 5 | # Instead of this 6 | custom_counter = {} 7 | for char in char_list: 8 | if char not in custom_counter: 9 | custom_counter[char] = 1 10 | else: 11 | custom_counter[char] += 1 12 | 13 | print(custom_counter) 14 | # {'a': 2, 'b': 3, 'c': 1, 'd': 1} 15 | 16 | # Use this 17 | print(Counter(char_list)) 18 | # Counter({'b': 3, 'a': 2, 'c': 1, 'd': 1}) -------------------------------------------------------------------------------- /code_snippets/pandas/df_pivot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame( 4 | { 5 | "item": ["apple", "apple", "apple", "apple", "apple"], 6 | "size": ["small", "small", "large", "large", "large"], 7 | "location": ["Walmart", "Aldi", "Walmart", "Aldi", "Aldi"], 8 | "price": [3, 2, 4, 3, 2.5], 9 | } 10 | ) 11 | 12 | print(df) 13 | 14 | pivot = pd.pivot_table( 15 | df, values="price", index=["item", "size"], columns=["location"], aggfunc="mean" 16 | ) 17 | print(pivot) 18 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/icecream_example.py: -------------------------------------------------------------------------------- 1 | from icecream import ic 2 | 3 | def plus_one(num): 4 | return num + 1 5 | 6 | # Instead of this 7 | print('output of plus_on with num = 1:', plus_one(1)) 8 | print('output of plus_on with num = 2:', plus_one(2)) 9 | 10 | # Use this 11 | ic(plus_one(1)) 12 | ic(plus_one(2)) 13 | 14 | # One your terminal 15 | """ 16 | $ python icecream_example. py 17 | output of plus_on with num = 1: 2 18 | output of plus_on with num = 2: 3 19 | ic| plus_one(1): 2 20 | ic| plus_one(2): 3 21 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/loguru_example.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_squared_error 2 | import numpy as np 3 | from loguru import logger 4 | 5 | logger.add("file_{time}.log", format="{time} {level} {message}") 6 | 7 | @logger.catch 8 | def evaluate_result(y_true: np.array, y_pred: np.array): 9 | mean_square_err = mean_squared_error(y_true, y_pred) 10 | root_mean_square_err = mean_square_err ** 0.5 11 | 12 | y_true = np.array([1, 2, 3]) 13 | y_pred = np.array([1.5, 2.2]) 14 | evaluate_result(y_true, y_pred) 15 | -------------------------------------------------------------------------------- /code_snippets/pandas/set_categories.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'col1': ['large', 'small', 'mini', 'medium', 'mini'], 4 | 'col2': [1, 2, 3, 4, 5]}) 5 | ordered_sizes = 'large', 'medium', 'small', 'mini' 6 | 7 | df.col1 = df.col1.astype('category') 8 | df.col1.cat.set_categories(ordered_sizes, ordered=True, inplace=True) 9 | print(df.sort_values(by='col1')) 10 | """ 11 | col1 col2 12 | 0 large 1 13 | 3 medium 4 14 | 1 small 2 15 | 2 mini 3 16 | 4 mini 5 17 | """ -------------------------------------------------------------------------------- /code_snippets/python/getattr_example.py: -------------------------------------------------------------------------------- 1 | class Food: 2 | def __init__(self, name: str, color: str): 3 | self.name = name 4 | self.color = color 5 | 6 | apple = Food('apple', 'red') 7 | 8 | print("The color of apple is", getattr(apple, 'color', 'yellow')) 9 | # The color of apple is red 10 | 11 | print("The flavor of apple is", getattr(apple, 'flavor', 'sweet')) 12 | # The flavor of apple is sweet 13 | 14 | print("The flavor of apple is", apple.sweet) 15 | # AttributeError: 'Food' object has no attribute 'sweet' -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pytest_parametrize.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | def text_contain_word(word: str, text: str): 4 | '''Find whether the text contains a particular word''' 5 | 6 | return word in text 7 | 8 | test = [ 9 | ('There is a duck in this text',True), 10 | ('There is nothing here', False) 11 | ] 12 | 13 | @pytest.mark.parametrize('sample, expected', test) 14 | def test_text_contain_word(sample, expected): 15 | 16 | word = 'duck' 17 | 18 | assert text_contain_word(word, sample) == expected -------------------------------------------------------------------------------- /code_snippets/pandas/assert_frame_equal.py: -------------------------------------------------------------------------------- 1 | from pandas.testing import assert_frame_equal 2 | import pandas as pd 3 | 4 | 5 | df1 = pd.DataFrame({'coll': [1,2,3], 'col2': [4,5,6]}) 6 | df2 = pd.DataFrame({'coll': [1,3,4], 'col2': [4,5,6]}) 7 | assert_frame_equal(df1, df2) 8 | 9 | """ 10 | AssertionError: DataFrame.iloc[:, 0] (column name="coll") are different 11 | 12 | DataFrame.iloc[:, 0] (column name="coll") values are different (66.66667 %) 13 | [index]: [0, 1, 2] 14 | [left]: [1, 2, 3] 15 | [right]: [1, 3, 4] 16 | """ 17 | 18 | 19 | -------------------------------------------------------------------------------- /code_snippets/python/args_example.py: -------------------------------------------------------------------------------- 1 | sample_range = (2, 5) 2 | sample_range2 = (3, 7) 3 | 4 | # With * 5 | print(list(range(*sample_range))) 6 | print(list(range(*sample_range2))) 7 | """ 8 | [2, 3, 4] 9 | [3, 4, 5, 6] 10 | """ 11 | 12 | # Without * 13 | print(list(range(sample_range))) 14 | """ 15 | Traceback (most recent call last): 16 | File "code_snippets/python/args_example.py", line 9, in 17 | print(list(range(sample_range))) 18 | TypeError: 'tuple' object cannot be interpreted as an integer 19 | """ 20 | 21 | 22 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/getname_example.py: -------------------------------------------------------------------------------- 1 | # pip install dill 2 | 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.neighbors import KNeighborsClassifier 5 | from dill.source import getname 6 | 7 | def save_model(model): 8 | model_name = getname(model) 9 | print(f"Saving model as model/{model_name}.pkl") 10 | 11 | save_model(KNeighborsClassifier) 12 | save_model(LogisticRegression) 13 | 14 | """Output 15 | Saving model as model/KNeighborsClassifier.pkl 16 | Saving model as model/LogisticRegression.pkl 17 | """ -------------------------------------------------------------------------------- /code_snippets/pandas/df_diff.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [2, 3, 4, 6]}) 3 | diff = df.diff() 4 | print(diff) 5 | """ 6 | a b 7 | 0 NaN NaN 8 | 1 1.0 1.0 9 | 2 1.0 1.0 10 | 3 1.0 2.0 11 | """ 12 | 13 | shift = diff.shift(-1) 14 | print(shift) 15 | """ 16 | a b 17 | 0 1.0 1.0 18 | 1 1.0 1.0 19 | 2 1.0 2.0 20 | 3 NaN NaN 21 | """ 22 | 23 | processed_df = shift.dropna() 24 | print(processed_df) 25 | """ 26 | a b 27 | 0 1.0 1.0 28 | 1 1.0 1.0 29 | 2 1.0 2.0 30 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/graphviz_example.py: -------------------------------------------------------------------------------- 1 | # pip install graphviz 2 | from graphviz import Graph 3 | 4 | # Instantiate a new Graph object 5 | dot = Graph('Data Science Process', format='png') 6 | 7 | # Add nodes 8 | dot.node('A', 'Get Data') 9 | dot.node('B', 'Clean, Prepare, & Manipulate Data') 10 | dot.node('C', 'Train Model') 11 | dot.node('D', 'Test Data') 12 | dot.node('E', 'Improve') 13 | 14 | # Connect these nodes 15 | dot.edges(['AB', 'BC', 'CD', 'DE']) 16 | 17 | # Save chart 18 | dot.render('data_science_flowchart', view=True) 19 | -------------------------------------------------------------------------------- /code_snippets/python/abc_example.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class Animal(ABC): 4 | 5 | def __init__(self, name: str): 6 | self.name = name 7 | super().__init__() 8 | 9 | @abstractmethod 10 | def make_sound(self): 11 | pass 12 | 13 | class Dog(Animal): 14 | def make_sound(self): 15 | print(f'{self.name} says: Woof') 16 | 17 | class Cat(Animal): 18 | def make_sound(self): 19 | print(f'{self.name} says: Meows') 20 | 21 | Dog('Pepper').make_sound() 22 | Cat('Bella').make_sound() -------------------------------------------------------------------------------- /code_snippets/pandas/df_datetime_comparison.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame( 4 | {"date": pd.date_range(start="2021-7-19", end="2021-7-23"), "value": list(range(5))} 5 | ) 6 | print(df) 7 | """ 8 | date value 9 | 0 2021-07-19 0 10 | 1 2021-07-20 1 11 | 2 2021-07-21 2 12 | 3 2021-07-22 3 13 | 4 2021-07-23 4 14 | """ 15 | 16 | filtered_df = df[df.date <= "2021-07-21"] 17 | print(filtered_df) 18 | """ 19 | date value 20 | 0 2021-07-19 0 21 | 1 2021-07-20 1 22 | 2 2021-07-21 2 23 | """ 24 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pytest_fixture.py: -------------------------------------------------------------------------------- 1 | # pip install textblob 2 | 3 | import pytest 4 | from textblob import TextBlob 5 | 6 | def extract_sentiment(text: str): 7 | """Extract sentimetn using textblob. Polarity is within range [-1, 1]""" 8 | 9 | text = TextBlob(text) 10 | return text.sentiment.polarity 11 | 12 | @pytest.fixture 13 | def example_data(): 14 | return 'Today I found a duck and I am happy' 15 | 16 | def test_extract_sentiment(example_data): 17 | sentiment = extract_sentiment(example_data) 18 | assert sentiment > 0 -------------------------------------------------------------------------------- /code_snippets/data_science_tools/datefinder_example.py: -------------------------------------------------------------------------------- 1 | # pip install datefinder 2 | 3 | from datefinder import find_dates 4 | 5 | text = """"We have one meeting on May 17th, 6 | 2021 at 9:00am and another meeting on 5/18/2021 7 | at 10:00. I hope you can attend one of the 8 | meetings.""" 9 | 10 | matches = find_dates(text) 11 | 12 | for match in matches: 13 | print("Date and time:", match) 14 | print("Only day:", match.day) 15 | 16 | """Output: 17 | Date and time: 2021-05-17 09:00:00 18 | Only day: 17 19 | Date and time: 2021-05-18 10:00:00 20 | Only day: 18 21 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/fastai_cont_cat_split.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from fastai.tabular.core import cont_cat_split 3 | 4 | df = pd.DataFrame( 5 | { 6 | "col1": [1, 2, 3, 4, 5], 7 | "col2": ["a", "b", "c", "d", "e"], 8 | "col3": [1.0, 2.0, 3.0, 4.0, 5.0], 9 | } 10 | ) 11 | 12 | cont_names, cat_names = cont_cat_split(df) 13 | print(cont_names) # ['col3'] 14 | print(cat_names) # ['col1', 'col2'] 15 | 16 | cont_names, cat_names = cont_cat_split(df, max_card=3) 17 | print(cont_names) # ['col1', 'col3'] 18 | print(cat_names) # ['col2'] 19 | -------------------------------------------------------------------------------- /code_snippets/python/built_in_functions_speed.py: -------------------------------------------------------------------------------- 1 | from timeit import timeit 2 | from numpy.random import randint 3 | 4 | def built_in_sum(l: list): 5 | return sum(l) 6 | 7 | def custom_sum(l: list): 8 | sum_val = 0 9 | for num in l: 10 | sum_val += num 11 | return sum_val 12 | 13 | l = randint(0, 100, size=100_000) 14 | expSize = 100 15 | 16 | built_in_time = timeit("built_in_sum(l)", number=expSize, globals=globals()) 17 | custom_time = timeit("custom_sum(l)", number=expSize, globals=globals()) 18 | print(custom_time/built_in_time) 19 | # 1.2499071011706575 -------------------------------------------------------------------------------- /code_snippets/data_science_tools/texthero_examples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import texthero as hero 4 | 5 | df = pd.DataFrame( 6 | { 7 | "text": [ 8 | "Today is a beautiful day", 9 | "There are 3 ducks in this pond", 10 | "This is. very cool.", 11 | np.nan, 12 | ] 13 | } 14 | ) 15 | 16 | print(df.text.pipe(hero.clean)) 17 | """ 18 | 0 today beautiful day 19 | 1 ducks pond 20 | 2 cool 21 | 3 22 | Name: text, dtype: object 23 | """ 24 | -------------------------------------------------------------------------------- /code_snippets/pandas/get_data_in_year_range.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from datetime import datetime 3 | df = pd.DataFrame({'date': [datetime(2018, 10, 1), 4 | datetime(2019, 10, 1), 5 | datetime(2020, 10, 1)], 6 | 'val': [1, 2, 3]}).set_index('date') 7 | 8 | print(df) 9 | """ 10 | val 11 | date 12 | 2018-10-01 1 13 | 2019-10-01 2 14 | 2020-10-01 3 15 | """ 16 | print(df.loc['2019':]) 17 | """ 18 | val 19 | date 20 | 2019-10-01 2 21 | 2020-10-01 3 22 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/icecream_datetime.py: -------------------------------------------------------------------------------- 1 | # pip install icecream 2 | from datetime import datetime 3 | from icecream import ic 4 | import time 5 | from datetime import datetime 6 | 7 | message = "I don't have prefix" 8 | ic(message) 9 | 10 | def time_format(): 11 | return f'{datetime.now()}|> ' 12 | 13 | ic.configureOutput(prefix=time_format) 14 | for _ in range(3): 15 | time.sleep(1) 16 | ic('Hello') 17 | 18 | """ 19 | ic| message: "I don't have prefix" 20 | 2021-06-20 08:12:21.777664|> 'Hello' 21 | 2021-06-20 08:12:22.780577|> 'Hello' 22 | 2021-06-20 08:12:23.782396|> 'Hello' 23 | """ -------------------------------------------------------------------------------- /code_snippets/python/classmethod_example.py: -------------------------------------------------------------------------------- 1 | class Solver: 2 | def __init__(self, nums: list): 3 | self.nums = nums 4 | 5 | @classmethod 6 | def get_even(cls, nums: list): 7 | return cls([num for num in nums if num % 2 == 0]) 8 | def print_output(self): 9 | print("Result:", self.nums) 10 | 11 | # Not using class method 12 | nums = [1, 2, 3, 4, 5, 6, 7] 13 | solver = Solver(nums).print_output() 14 | """ 15 | Result: [1, 2, 3, 4, 5, 6, 7] 16 | """ 17 | 18 | solver2 = Solver.get_even(nums) 19 | solver2.print_output() 20 | """ 21 | Result: [2, 4, 6] 22 | """ 23 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/pygithub_example.py: -------------------------------------------------------------------------------- 1 | from github import Github 2 | 3 | # g = Github('user', 'password') # use username and password 4 | g = Github('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') # or use an access token 5 | 6 | for i, repo in enumerate(g.search_topics('machine learning')): 7 | if i < 10: 8 | print(repo.name) 9 | 10 | """ 11 | machine-learning 12 | deep-learning 13 | scikit-learn 14 | jupyter-notebook 15 | scikitlearn-machine-learning 16 | coursera 17 | unsupervised-machine-learning 18 | supervised-machine-learning 19 | coursera-machine-learning 20 | adversarial-machine-learning 21 | """ -------------------------------------------------------------------------------- /code_snippets/python/argparse_example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser() 4 | 5 | # Add optional argument 6 | parser.add_argument( 7 | "-p", 8 | "--Parameter", 9 | default=4, # default value 10 | type=int, # data type 11 | help="Choose the parameter", 12 | ) # description 13 | 14 | # Read arguments on the command line 15 | args = parser.parse_args() 16 | print(f'Your chosen parameter is {args.Parameter}') 17 | 18 | """On your terminal 19 | $ python argparse_example.py 20 | Your chosen parameter is 4 21 | 22 | $ python argparse_example.py -p 5 23 | Your chosen parameter is 5 24 | """ -------------------------------------------------------------------------------- /code_snippets/python/decorator_example.py: -------------------------------------------------------------------------------- 1 | import time 2 | def time_func(func): 3 | def wrapper(): 4 | print("This happens before the function is called") 5 | start = time.time() 6 | func() 7 | print('This happens after the funciton is called') 8 | end = time.time() 9 | print('The duration is', end - start, 's') 10 | 11 | return wrapper 12 | 13 | @time_func 14 | def say_hello(): 15 | print("hello") 16 | 17 | say_hello() 18 | """ 19 | This happens before the function is called 20 | hello 21 | This happens after the funciton is called 22 | The duration is 4.0531158447265625e-06 s 23 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/add_datepart_example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from fastai.tabular.core import add_datepart 3 | from datetime import datetime 4 | 5 | df = pd.DataFrame({'date': [datetime(2020, 2, 5), datetime(2020, 2, 6), 6 | datetime(2020, 2, 7), datetime(2020, 2, 8)], 7 | 'val': [1, 2, 3, 4]}) 8 | 9 | df = add_datepart(df, 'date') 10 | print(df.columns) 11 | """ 12 | Index(['val', 'Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear', 13 | 'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 14 | 'Is_year_end', 'Is_year_start', 'Elapsed'], 15 | dtype='object') 16 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/autoscraper_example.py: -------------------------------------------------------------------------------- 1 | # pip install autoscraper 2 | 3 | from autoscraper import AutoScraper 4 | 5 | url = 'https://stackoverflow.com/questions/2081586/web-scraping-with-python' 6 | 7 | wanted_list = ['How to check version of python modules?'] 8 | 9 | scraper = AutoScraper() 10 | result = scraper.build(url, wanted_list) 11 | 12 | for res in result: 13 | print(res) 14 | """ 15 | How to execute a program or call a system command from Python 16 | What are metaclasses in Python? 17 | Does Python have a ternary conditional operator? 18 | Convert bytes to a string 19 | Does Python have a string 'contains' substring method? 20 | How to check version of python modules? 21 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/pyfiglet_example.py: -------------------------------------------------------------------------------- 1 | # pip install pyfiglet 2 | # pip install termcolor 3 | 4 | import pyfiglet 5 | from termcolor import colored, cprint 6 | 7 | out = pyfiglet.figlet_format("Hello") 8 | print(out) 9 | """ 10 | __ __ ____ 11 | / / / /__ / / /___ 12 | / /_/ / _ \/ / / __ \ 13 | / __ / __/ / / /_/ / 14 | /_/ /_/\___/_/_/\____/ 15 | """ 16 | 17 | out = pyfiglet.figlet_format("Hello", font='slant') 18 | print(out) 19 | 20 | cprint(pyfiglet.figlet_format('Hello', font='bell'), 'blue') 21 | """ 22 | __ __ . . 23 | | | ___ | | __. 24 | |___| .' ` | | .' \ 25 | | | |----' | | | | 26 | / / `.___, /\__ /\__ `._.' 27 | """ -------------------------------------------------------------------------------- /code_snippets/pandas/df_merge.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df1 = pd.DataFrame({'left_key': [1, 2, 3], 'a': [4, 5, 6]}) 4 | df2 = pd.DataFrame({'right_key': [1, 2, 3], 'a': [5, 6, 7]}) 5 | print(df1.merge(df2, left_on='left_key', right_on='right_key')) 6 | """ 7 | left_key a_x right_key a_y 8 | 0 1 4 1 5 9 | 1 2 5 2 6 10 | 2 3 6 3 7 11 | """ 12 | 13 | print(df1.merge(df2, left_on='left_key', right_on='right_key', 14 | suffixes=('_left', '_right'))) 15 | """ 16 | left_key a_left right_key a_right 17 | 0 1 4 1 5 18 | 1 2 5 2 6 19 | 2 3 6 3 7 20 | """ 21 | -------------------------------------------------------------------------------- /code_snippets/pandas/df_rolling.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from datetime import date 3 | 4 | df = pd.DataFrame({'date': [date(2021, 1, 20), date(2021, 1, 21), date(2021, 1, 22), 5 | date(2021, 1, 23), date(2021, 1, 24)], 6 | 'value': [1, 2, 3, 4, 5]}).set_index('date') 7 | 8 | print(df) 9 | """ 10 | value 11 | date 12 | 2021-01-20 1 13 | 2021-01-21 2 14 | 2021-01-22 3 15 | 2021-01-23 4 16 | 2021-01-24 5 17 | """ 18 | 19 | print(df.rolling(3).mean()) 20 | """ 21 | value 22 | date 23 | 2021-01-20 NaN 24 | 2021-01-21 NaN 25 | 2021-01-22 2.0 26 | 2021-01-23 3.0 27 | 2021-01-24 4.0 28 | """ 29 | -------------------------------------------------------------------------------- /code_snippets/python/itertools_examples.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | params = { 4 | "learning_rate": [1e-1, 1e-2, 1e-3], 5 | "batch_size": [16, 32, 64], 6 | } 7 | 8 | for vals in product(*params.values()): 9 | combination = dict(zip(params.keys(), vals)) 10 | print(combination) 11 | """ 12 | {'learning_rate': 0.1, 'batch_size': 16} 13 | {'learning_rate': 0.1, 'batch_size': 32} 14 | {'learning_rate': 0.1, 'batch_size': 64} 15 | {'learning_rate': 0.01, 'batch_size': 16} 16 | {'learning_rate': 0.01, 'batch_size': 32} 17 | {'learning_rate': 0.01, 'batch_size': 64} 18 | {'learning_rate': 0.001, 'batch_size': 16} 19 | {'learning_rate': 0.001, 'batch_size': 32} 20 | {'learning_rate': 0.001, 'batch_size': 64} 21 | """ 22 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/getme_forecast.sh: -------------------------------------------------------------------------------- 1 | getme forecast -f 2 | 3 | :' 4 | Collinsville, United States 5 | 6 | Mon Jun 21 2021 | 12 h 7 | ☁️ Clouds | Overcast clouds 8 | Temperature 65.14 °F 9 | Min. 65.14 °F | Max. 65.14 °F 10 | 11 | 12 | Tue Jun 22 2021 | 12 h 13 | ☀️ Clear | Clear sky 14 | Temperature 54.16 °F 15 | Min. 54.16 °F | Max. 54.16 °F 16 | 17 | 18 | Wed Jun 23 2021 | 12 h 19 | ☁️ Clouds | Scattered clouds 20 | Temperature 57.18 °F 21 | Min. 57.18 °F | Max. 57.18 °F 22 | 23 | 24 | Thu Jun 24 2021 | 12 h 25 | ☀️ Clear | Clear sky 26 | Temperature 63.93 °F 27 | Min. 63.93 °F | Max. 63.93 °F 28 | 29 | 30 | Fri Jun 25 2021 | 12 h 31 | 🌦️ Rain | Moderate rain 32 | Temperature 71.78 °F 33 | Min. 71.78 °F | Max. 71.78 34 | ' -------------------------------------------------------------------------------- /code_snippets/data_science_tools/docopt_example.py: -------------------------------------------------------------------------------- 1 | """Extract keywords of an input file 2 | 3 | Usage: 4 | docopt_example.py --data-dir= [--input-path=] 5 | 6 | Options: 7 | --data-dir= Directory of the data 8 | --input-path= Name of the input file [default: input_text.txt] 9 | """ 10 | 11 | from docopt import docopt 12 | 13 | if __name__ == '__main__': 14 | args = docopt(__doc__, argv=None, help=True) 15 | data_dir = args['--data-dir'] 16 | input_path = args['--input-path'] 17 | 18 | if data_dir: 19 | print(f"Extracting keywords from {data_dir}/{input_path}") 20 | 21 | """ 22 | $ python docopt_example.py --data-dir=Data 23 | Extracting keywords from Data/input_text.txt 24 | """ -------------------------------------------------------------------------------- /code_snippets/pandas/reduce_memory.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | import pandas as pd 3 | 4 | X, y = load_iris(as_frame=True, return_X_y=True) 5 | df = pd.concat([X, pd.DataFrame(y, columns=['target'])], axis=1) 6 | print(df.memory_usage()) 7 | """ 8 | Index 128 9 | sepal length (cm) 1200 10 | sepal width (cm) 1200 11 | petal length (cm) 1200 12 | petal width (cm) 1200 13 | target 1200 14 | dtype: int64 15 | """ 16 | df['target'] = df['target'].astype('category') 17 | print(df.memory_usage()) 18 | """ 19 | Index 128 20 | sepal length (cm) 1200 21 | sepal width (cm) 1200 22 | petal length (cm) 1200 23 | petal width (cm) 1200 24 | target 282 25 | dtype: int64 26 | """ 27 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/datapane_example.py: -------------------------------------------------------------------------------- 1 | # pip install datapane 2 | # pip install plotly 3 | 4 | import datapane as dp 5 | import pandas as pd 6 | import numpy as np 7 | import plotly.express as px 8 | 9 | # Scripts to create df and chart 10 | df = px.data.gapminder() 11 | 12 | chart = px.scatter(df.query("year==2007"), x="gdpPercap", y="lifeExp", 13 | size="pop", color="continent", 14 | hover_name="country", log_x=True, size_max=60) 15 | 16 | # Once you have the df and the chart, simply use 17 | r = dp.Report( 18 | dp.Text("my simple report"), # add description 19 | dp.DataTable(df), # create a table 20 | dp.Plot(chart) # create a chart 21 | ) 22 | 23 | # Publish your report 24 | r.publish(name='example', visibility = dp.Visibility.PUBLIC) -------------------------------------------------------------------------------- /code_snippets/data_science_tools/textblob_example.py: -------------------------------------------------------------------------------- 1 | # pip install textblob 2 | # python -m textblob.download_corpora 3 | 4 | from textblob import TextBlob 5 | 6 | text = "Today is a beautiful day" 7 | blob = TextBlob(text) 8 | 9 | print(blob.words) # Word tokenization 10 | # ['Today', 'is', 'a', 'beautiful', 'day'] 11 | 12 | print(blob.noun_phrases) # Noun phrase extraction 13 | # ['beautiful day'] 14 | 15 | print(blob.sentiment) # Sentiment analysis 16 | # Sentiment(polarity=0.85, subjectivity=1.0) 17 | 18 | print(blob.word_counts) # Word counts 19 | # defaultdict(, {'today': 1, 'is': 1, 'a': 1, 'beautiful': 1, 'day': 1}) 20 | 21 | # Spelling correction 22 | text = "Today is a beutiful day" 23 | blob = TextBlob(text) 24 | print(blob.correct()) 25 | # Today is a beautiful day 26 | -------------------------------------------------------------------------------- /code_snippets/python/property_decorator.py: -------------------------------------------------------------------------------- 1 | class Fruit: 2 | def __init__(self, name: str, color: str): 3 | self._name = name 4 | self._color = color 5 | 6 | @property 7 | def color(self): 8 | print("The color of the fruit is:") 9 | return self._color 10 | 11 | @color.setter 12 | def color(self, value): 13 | print("Setting value of color...") 14 | if self._color is None: 15 | if not isinstance(value, str): 16 | raise ValueError('color must be of type string') 17 | self.color = value 18 | else: 19 | raise AttributeError("Sorry, you cannot change a fruit's color!") 20 | 21 | fruit = Fruit('apple', 'red') 22 | print(fruit.color) 23 | fruit.color = 'yellow' 24 | -------------------------------------------------------------------------------- /code_snippets/pandas/dataframe_pipe.py: -------------------------------------------------------------------------------- 1 | # pip install textblob 2 | import pandas as pd 3 | from textblob import TextBlob 4 | 5 | def remove_white_space(df: pd.DataFrame): 6 | df['text'] = df['text'].apply(lambda row: row.strip()) 7 | return df 8 | 9 | def get_sentiment(df: pd.DataFrame): 10 | df['sentiment'] = df['text'].apply(lambda row: 11 | TextBlob(row).sentiment[0]) 12 | return df 13 | 14 | df = pd.DataFrame({'text': ["It is a beautiful day today ", 15 | " This movie is terrible"]}) 16 | 17 | df = (df.pipe(remove_white_space) 18 | .pipe(get_sentiment) 19 | ) 20 | 21 | print(df) 22 | """ 23 | text sentiment 24 | 0 It is a beautiful day today 0.85 25 | 1 This movie is terrible -1.00 26 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/causalimpact_example.py: -------------------------------------------------------------------------------- 1 | # pip install pycausalimpact 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from statsmodels.tsa.arima_process import ArmaProcess 6 | from causalimpact import CausalImpact 7 | 8 | # Generate random sample 9 | 10 | np.random.seed(0) 11 | ar = np.r_[1, 0.9] 12 | ma = np.array([1]) 13 | arma_process = ArmaProcess(ar, ma) 14 | 15 | X = 50 + arma_process.generate_sample(nsample=1000) 16 | y = 1.6 * X + np.random.normal(size=1000) 17 | 18 | # There is a change starting from index 800 19 | y[800:] += 10 20 | 21 | data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X']) 22 | pre_period = [0, 799] 23 | post_period = [800, 999] 24 | 25 | ci = CausalImpact(data, pre_period, post_period) 26 | print(ci.summary()) 27 | print(ci.summary(output='report')) 28 | ci.plot() -------------------------------------------------------------------------------- /code_snippets/cool_tools/decorator_module.py: -------------------------------------------------------------------------------- 1 | from decorator import decorator 2 | from time import time, sleep 3 | 4 | def time_func(func): 5 | def wrapper(*args, **kwargs): 6 | start_time = time() 7 | func(*args, **kwargs) 8 | end_time = time() 9 | print( 10 | f"""It takes {round(end_time - start_time, 3)} seconds to execute the function""" 11 | ) 12 | return wrapper 13 | 14 | @decorator 15 | def time_func_with_decorator(func, *args, **kwargs): 16 | start_time = time() 17 | func(*args, **kwargs) 18 | end_time = time() 19 | print( 20 | f"""It takes {round(end_time - start_time, 3)} seconds to execute the function""" 21 | ) 22 | @time_func_with_decorator 23 | def test_func(): 24 | sleep(1) 25 | 26 | if __name__== '__main__': 27 | test_func() 28 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/swifter_example.py: -------------------------------------------------------------------------------- 1 | # pip install swifter 2 | from time import time 3 | from sklearn.datasets import fetch_california_housing 4 | from scipy.special import boxcox1p 5 | import swifter 6 | import timeit 7 | 8 | X, y = fetch_california_housing(return_X_y=True, as_frame=True) 9 | 10 | def pandas_apply(): 11 | X["AveRooms"].apply(lambda x: boxcox1p(x, 0.25)) 12 | 13 | 14 | def swifter_apply(): 15 | X["AveRooms"].swifter.apply(lambda x: boxcox1p(x, 0.25)) 16 | 17 | num_experiments = 100 18 | pandas_time = timeit.timeit(pandas_apply, number=num_experiments) 19 | swifter_time = timeit.timeit(swifter_apply, number=num_experiments) 20 | 21 | pandas_vs_swifter = round(pandas_time/swifter_time, 2) 22 | print(f'Swifter apply is {pandas_vs_swifter} times faster than Pandas apply') 23 | # Swifter apply is 12.54 times faster than Pandas apply -------------------------------------------------------------------------------- /code_snippets/python/heapq_example.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | import random 3 | from timeit import timeit 4 | 5 | random.seed(0) 6 | l = random.sample(range(0, 10000), 10000) 7 | 8 | def get_n_max_sorting(l: list, n: int): 9 | l = sorted(l, reverse=True) 10 | return l[:n] 11 | 12 | def get_n_max_heapq(l: list, n: int): 13 | return heapq.nlargest(n, l) 14 | 15 | expSize = 1000 16 | n = 100 17 | time_sorting = timeit("get_n_max_sorting(l, n)", number=expSize, 18 | globals=globals()) 19 | time_heapq = timeit('get_n_max_heapq(l, n)', number=expSize, 20 | globals=globals()) 21 | 22 | ratio = round(time_sorting/time_heapq, 3) 23 | print(f'Run {expSize} experiments. Using heapq is {ratio} times' 24 | 'faster than using sorting') 25 | """ 26 | Run 1000 experiments. Using heapq is 2.659 timesfaster than using sorting 27 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pipeline_gridsearchcv.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split, GridSearchCV 2 | from sklearn.preprocessing import StandardScaler 3 | from sklearn.pipeline import make_pipeline 4 | from sklearn.svm import SVC 5 | from sklearn.datasets import load_iris 6 | 7 | # load data 8 | df = load_iris() 9 | X = df.data 10 | y = df.target 11 | 12 | # split data into train and test 13 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 14 | 15 | # Create a pipeline variable 16 | make_pipe = make_pipeline(StandardScaler(), SVC()) 17 | 18 | # Defining parameters grid 19 | grid_params = {'svc__C': [0.1, 1, 10, 100, 1000], 'svc__gamma': [0.1, 1, 10, 100]} 20 | 21 | # hypertuning 22 | grid = GridSearchCV(make_pipe, grid_params, cv = 5) 23 | grid.fit(X_train, y_train) 24 | 25 | # predict 26 | y_pref = grid.predict(X_test) -------------------------------------------------------------------------------- /code_snippets/data_science_tools/spacy_ngram.py: -------------------------------------------------------------------------------- 1 | # pip install textacy 2 | # pip install spacy 3 | # python -m spacy download en_core_web_sm 4 | 5 | import pandas as pd 6 | import spacy 7 | from textacy.extract import ngrams 8 | 9 | nlp = spacy.load('en_core_web_sm') 10 | 11 | text = nlp('Data science is an inter-disciplinary field that uses' 12 | ' scientific methods, processes, algorithms, and systme to extract' 13 | ' knowledge and insights from many structural and unstructured data.') 14 | 15 | n_grams = 2 # contiguous sequence of a word 16 | min_freq = 1 # extract n -gram based on its frequency 17 | 18 | print(pd.Series([n.text for n in ngrams(text, n=n_grams, min_freq=1)]).value_counts()) 19 | """ 20 | disciplinary field 1 21 | scientific methods 1 22 | unstructured data 1 23 | Data science 1 24 | extract knowledge 1 25 | uses scientific 1 26 | """ -------------------------------------------------------------------------------- /code_snippets/data_science_tools/pandera_example.py: -------------------------------------------------------------------------------- 1 | # pip install pandera 2 | 3 | import pandera as pa 4 | from pandera import check_input 5 | import pandas as pd 6 | 7 | df = pd.DataFrame({'col1': [5.0, 8.0, 10.0], 8 | 'col2': ['text_1', 'text_2', 'text_3']}) 9 | schema = pa.DataFrameSchema({ 10 | "col1": pa.Column(float, pa.Check(lambda minute: 5 <= minute)), 11 | "col2": pa.Column(str, pa.Check.str_startswith("text_")) 12 | }) 13 | validated_df = schema(df) 14 | print(validated_df) 15 | """ 16 | col1 col2 17 | 0 5.0 text_1 18 | 1 8.0 text_2 19 | 2 10.0 text_3 20 | """ 21 | 22 | @check_input(schema) 23 | def plus_three(df): 24 | df['col1_plus_3'] = df['col1'] + 3 25 | return df 26 | 27 | print(plus_three(df)) 28 | """ 29 | col1 col2 col1_plus_3 30 | 0 5.0 text_1 8.0 31 | 1 8.0 text_2 11.0 32 | 2 10.0 text_3 13.0 33 | """ -------------------------------------------------------------------------------- /code_snippets/pandas/select_dtypes.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | df = pd.DataFrame({'col1': ['a', 'b', 'c'], 'col2': [1, 2, 3], 4 | 'col3': [0.1, 0.2, 0.3]}) 5 | 6 | print(df.info()) 7 | """ 8 | 9 | RangeIndex: 3 entries, 0 to 2 10 | Data columns (total 3 columns): 11 | # Column Non-Null Count Dtype 12 | --- ------ -------------- ----- 13 | 0 col1 3 non-null object 14 | 1 col2 3 non-null int64 15 | 2 col3 3 non-null float64 16 | dtypes: float64(1), int64(1), object(1) 17 | memory usage: 200.0+ bytes 18 | """ 19 | 20 | print(df.select_dtypes(include=['int64', 'float64'])) 21 | """ 22 | col2 col3 23 | 0 1 0.1 24 | 1 2 0.2 25 | 2 3 0.3 26 | """ 27 | 28 | print(df.select_dtypes(exclude=['object'])) 29 | """ 30 | col2 col3 31 | 0 1 0.1 32 | 1 2 0.2 33 | 2 3 0.3 34 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/notion_example.py: -------------------------------------------------------------------------------- 1 | # pip install notion 2 | 3 | from notion.client import NotionClient 4 | from notion.block import NumberedListBlock 5 | 6 | client = NotionClient("") 7 | 8 | page = client.get_block("https://www.notion.so/Error-shooting-How-to-3c1904c9869b47118b8656df8b2f8d11") 9 | 10 | print(page.title) 11 | # Error shooting / How to 12 | 13 | page.title = "How to / Error shooting" 14 | 15 | print(page.title) 16 | # How to / Error shooting 17 | 18 | for child in page.children: 19 | print(child.title) 20 | 21 | """ 22 | Linux 23 | Changes to the system 24 | Hydra 25 | Python 26 | WordPress 27 | """ 28 | 29 | new = page.children.add_new( 30 | NumberedListBlock, title='Item 1' 31 | ) 32 | 33 | for child in page.children: 34 | print(child.title) 35 | 36 | """ 37 | Linux 38 | Changes to the system 39 | Hydra 40 | Python 41 | WordPress 42 | Item 1 43 | """ 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/kedro_example.py: -------------------------------------------------------------------------------- 1 | from kedro.pipeline import node, Pipeline 2 | from kedro.io import DataCatalog, MemoryDataSet 3 | from kedro.runner import SequentialRunner 4 | 5 | # Prepare a data catalog 6 | data_catalog = DataCatalog({"data.csv": MemoryDataSet()}) 7 | 8 | # Prepare first node 9 | def process_data(): 10 | return f"processed data" 11 | 12 | process_data_node = node( 13 | func=process_data, inputs=None, outputs="processed_data" 14 | ) 15 | 16 | def train_model(data: str): 17 | return f"Training model using {data}" 18 | 19 | train_model_node = node( 20 | func=train_model, inputs="processed_data", outputs="trained_model" 21 | ) 22 | 23 | # Assemble nodes into a pipeline 24 | pipeline = Pipeline([process_data_node, train_model_node]) 25 | 26 | # Create a runner to run the pipeline 27 | runner = SequentialRunner() 28 | print(runner.run(pipeline, data_catalog)) 29 | # {'trained_model': 'Training model using processed data'} -------------------------------------------------------------------------------- /code_snippets/data_science_tools/fastai_df_shrink.py: -------------------------------------------------------------------------------- 1 | # pip install fastai 2 | 3 | from fastai.tabular.core import df_shrink 4 | import pandas as pd 5 | 6 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": [1.0, 2.0, 3.0]}) 7 | print(df.info()) 8 | """ 9 | 10 | RangeIndex: 3 entries, 0 to 2 11 | Data columns (total 2 columns): 12 | # Column Non-Null Count Dtype 13 | --- ------ -------------- ----- 14 | 0 col1 3 non-null int64 15 | 1 col2 3 non-null float64 16 | dtypes: float64(1), int64(1) 17 | memory usage: 176.0 bytes 18 | """ 19 | 20 | new_df = df_shrink(df) 21 | print(new_df.info()) 22 | """ 23 | 24 | RangeIndex: 3 entries, 0 to 2 25 | Data columns (total 2 columns): 26 | # Column Non-Null Count Dtype 27 | --- ------ -------------- ----- 28 | 0 col1 3 non-null int8 29 | 1 col2 3 non-null float32 30 | dtypes: float32(1), int8(1) 31 | memory usage: 143.0 bytes 32 | """ -------------------------------------------------------------------------------- /code_snippets/cool_tools/isort_example.py: -------------------------------------------------------------------------------- 1 | # Befort isort 2 | from sklearn.metrics import confusion_matrix, fl_score, classification_report, roc_curve 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.model_selection import GridSearchCV, StratifiedKFold 5 | from sklearn import svm 6 | from sklearn.naive_bayes import GaussianNB, MultinomialNB 7 | from sklearn.neighbors import KNeighborsClassifier 8 | from sklearn.tree import DecisionTreeClassifier 9 | from sklearn.model_selection import TimeSeriesSplit 10 | 11 | # On your terminal: isort name_of_your_file.py 12 | from sklearn import svm 13 | from sklearn.metrics import (classification_report, confusion_matrix, fl_score, 14 | roc_curve) 15 | from sklearn.model_selection import (GridSearchCV, StratifiedKFold, 16 | TimeSeriesSplit, train_test_split) 17 | from sklearn.naive_bayes import GaussianNB, MultinomialNB 18 | from sklearn.neighbors import KNeighborsClassifier 19 | from sklearn.tree import DecisionTreeClassifier 20 | -------------------------------------------------------------------------------- /code_snippets/python/singledispatch_example.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | # Without singledispatch 4 | 5 | 6 | def process_data(data): 7 | if isinstance(data, dict): 8 | process_dict(data) 9 | 10 | else: 11 | process_list(data) 12 | 13 | 14 | def process_dict(data: dict): 15 | print("Dict is processed") 16 | 17 | 18 | def process_list(data: list): 19 | print("List is processed") 20 | 21 | 22 | # ---------------------------------------------------------------------------- # 23 | # With singledispatch 24 | 25 | 26 | @singledispatch 27 | def process_data2(data): 28 | raise NotImplementedError("Please implement process_data2") 29 | 30 | 31 | @process_data2.register 32 | def process_dict2(data: dict): 33 | print("Dict is processed") 34 | 35 | 36 | @process_data2.register 37 | def process_list2(data: list): 38 | print("List is processed") 39 | 40 | 41 | data = {"a": [1, 2, 3], "b": [4, 5, 6]} 42 | data2 = [{"a": [1, 2, 3]}, {"b": [4, 5, 6]}] 43 | 44 | process_data2(data) 45 | """Dict is processed""" 46 | 47 | process_data2(data2) 48 | """List is processed""" 49 | -------------------------------------------------------------------------------- /code_snippets/notebook/display_math_equations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "language_info": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 3 7 | }, 8 | "file_extension": ".py", 9 | "mimetype": "text/x-python", 10 | "name": "python", 11 | "nbconvert_exporter": "python", 12 | "pygments_lexer": "ipython3", 13 | "version": "3.8.1" 14 | }, 15 | "orig_nbformat": 2, 16 | "kernelspec": { 17 | "name": "pythonjvsc74a57bd0a09dca2afe15d58e0a8e12d0967f3bf0e997b36a4eb26c84cbca546ff6ad81d4", 18 | "display_name": "Python 3.8.1 ('venv': venv)" 19 | }, 20 | "metadata": { 21 | "interpreter": { 22 | "hash": "a09dca2afe15d58e0a8e12d0967f3bf0e997b36a4eb26c84cbca546ff6ad81d4" 23 | } 24 | } 25 | }, 26 | "nbformat": 4, 27 | "nbformat_minor": 2, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "output_type": "stream", 36 | "name": "stdout", 37 | "text": [ 38 | "The equation is:\n" 39 | ] 40 | }, 41 | { 42 | "output_type": "display_data", 43 | "data": { 44 | "text/plain": "", 45 | "text/latex": "$\\displaystyle y= 3x+5$" 46 | }, 47 | "metadata": {} 48 | } 49 | ], 50 | "source": [ 51 | "from IPython.display import display, Math, Latex\n", 52 | "\n", 53 | "a = 3\n", 54 | "b = 5\n", 55 | "print(\"The equation is:\")\n", 56 | "display(Math(f'y= {a}x+{b}'))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ] 67 | } -------------------------------------------------------------------------------- /code_snippets/numpy/array_to_latex_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "serial-beatles", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-06-23T13:01:26.090538Z", 10 | "start_time": "2021-06-23T13:01:20.319340Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "pip install array_to_latex" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "id": "terminal-tower", 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2021-06-23T13:01:31.648086Z", 25 | "start_time": "2021-06-23T13:01:31.422198Z" 26 | } 27 | }, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "\\begin{bmatrix}\n", 34 | " 1.00 & 2.00 & 3.00\\\\\n", 35 | " 4.00 & 5.00 & 6.00\n", 36 | "\\end{bmatrix}\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "import numpy as np \n", 42 | "import array_to_latex as a2l \n", 43 | "\n", 44 | "a = np.array([[1, 2, 3], [4, 5, 6]])\n", 45 | "latex = a2l.to_ltx(a)\n", 46 | "latex" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "parallel-revelation", 52 | "metadata": {}, 53 | "source": [ 54 | "\\begin{bmatrix}\n", 55 | " 1.00 & 2.00 & 3.00\\\\\n", 56 | " 4.00 & 5.00 & 6.00\n", 57 | "\\end{bmatrix}" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "romance-louisiana", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | } 68 | ], 69 | "metadata": { 70 | "kernelspec": { 71 | "display_name": "Python 3", 72 | "language": "python", 73 | "name": "python3" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 3 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython3", 85 | "version": "3.8.1" 86 | }, 87 | "toc": { 88 | "base_numbering": 1, 89 | "nav_menu": {}, 90 | "number_sections": true, 91 | "sideBar": true, 92 | "skip_h1_title": false, 93 | "title_cell": "Table of Contents", 94 | "title_sidebar": "Contents", 95 | "toc_cell": false, 96 | "toc_position": {}, 97 | "toc_section_display": true, 98 | "toc_window_display": false 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 5 103 | } 104 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/compare_2_datasets/compare_datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "consolidated-rabbit", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-06-20T18:58:20.385118Z", 10 | "start_time": "2021-06-20T18:58:14.166126Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "pip install sweetviz" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "id": "japanese-observer", 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2021-06-20T18:58:32.833352Z", 25 | "start_time": "2021-06-20T18:58:30.121608Z" 26 | } 27 | }, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "application/vnd.jupyter.widget-view+json": { 32 | "model_id": "7d5e393d9b13423d8134501d93ca1734", 33 | "version_major": 2, 34 | "version_minor": 0 35 | }, 36 | "text/plain": [ 37 | " | | [ 0%] 00:00 -> (? left)" 38 | ] 39 | }, 40 | "metadata": {}, 41 | "output_type": "display_data" 42 | }, 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "from sklearn.datasets import load_iris\n", 53 | "from sklearn.model_selection import train_test_split\n", 54 | "import sweetviz as sv \n", 55 | "\n", 56 | "X, y = load_iris(return_X_y=True, as_frame=True)\n", 57 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n", 58 | "\n", 59 | "report = sv.compare([X_train, 'train data'], [X_test, 'test data'])\n", 60 | "report.show_html()" 61 | ] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.8.1" 81 | }, 82 | "toc": { 83 | "base_numbering": 1, 84 | "nav_menu": {}, 85 | "number_sections": true, 86 | "sideBar": true, 87 | "skip_h1_title": false, 88 | "title_cell": "Table of Contents", 89 | "title_sidebar": "Contents", 90 | "toc_cell": false, 91 | "toc_position": {}, 92 | "toc_section_display": true, 93 | "toc_window_display": false 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 5 98 | } 99 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/mito_example/mito_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "id": "aa6d4b81", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-09-03T13:53:01.448473Z", 10 | "start_time": "2021-09-03T13:53:01.428519Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "from sklearn.datasets import load_iris\n", 16 | "\n", 17 | "data = load_iris(as_frame=True)\n", 18 | "data['data'].to_csv('iris.csv')" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "id": "184e9cd1", 25 | "metadata": { 26 | "ExecuteTime": { 27 | "end_time": "2021-09-03T13:53:10.302258Z", 28 | "start_time": "2021-09-03T13:53:10.146695Z" 29 | } 30 | }, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "application/vnd.jupyter.widget-view+json": { 35 | "model_id": "b1dcfcb3c6844bbaa1861f5ff0a91e63", 36 | "version_major": 2, 37 | "version_minor": 0 38 | }, 39 | "text/plain": [ 40 | "MitoWidget(analysis_name='UUID-47d43efb-3aaf-40d0-8d93-8b00df165284', code_json='{\"imports\": \"from mitosheet i…" 41 | ] 42 | }, 43 | "metadata": {}, 44 | "output_type": "display_data" 45 | } 46 | ], 47 | "source": [ 48 | "import mitosheet\n", 49 | "mitosheet.sheet()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "ce01ae76-c0aa-4b49-839e-5511ae4dfd33", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "5892beb5-cc54-4fad-a25e-acb572b06b9f", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "8a39338b", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3 (ipykernel)", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.8.10" 94 | }, 95 | "toc": { 96 | "base_numbering": 1, 97 | "nav_menu": {}, 98 | "number_sections": true, 99 | "sideBar": true, 100 | "skip_h1_title": false, 101 | "title_cell": "Table of Contents", 102 | "title_sidebar": "Contents", 103 | "toc_cell": false, 104 | "toc_position": {}, 105 | "toc_section_display": true, 106 | "toc_window_display": false 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 5 111 | } 112 | -------------------------------------------------------------------------------- /code_snippets/cool_tools/interrogate_example.py: -------------------------------------------------------------------------------- 1 | class Math: 2 | def __init__(self, num) -> None: 3 | self.num = num 4 | 5 | def plus_two(self): 6 | """Add 2""" 7 | return self.num + 2 8 | 9 | def multiply_three(self): 10 | return self.num * 3 11 | 12 | 13 | """ 14 | $ interrogate -vv -i interrogate_example.py 15 | ======================================= Coverage for /Users/khuyen/Python-data-science-code-snippet/code_snippets/cool_tools/ ======================================== 16 | ------------------------------------------------------------------------- Detailed Coverage -------------------------------------------------------------------------- 17 | | Name | Status | 18 | |-------------------------------------------------------------------------------------------------|------------------------------------------------------------------| 19 | | interrogate_example.py (module) | MISSED | 20 | | MathOperation (L1) | MISSED | 21 | | MathOperation.plus_two (L5) | COVERED | 22 | | MathOperation.multiply_three (L9) | MISSED | 23 | |-------------------------------------------------------------------------------------------------|------------------------------------------------------------------| 24 | 25 | ------------------------------------------------------------------------------ Summary ------------------------------------------------------------------------------- 26 | | Name | Total | Miss | Cover | Cover% | 27 | |------------------------------------------------|----------------------------|---------------------------|----------------------------|-----------------------------| 28 | | interrogate_example.py | 4 | 3 | 1 | 25% | 29 | |------------------------------------------------|----------------------------|---------------------------|----------------------------|-----------------------------| 30 | | TOTAL | 4 | 3 | 1 | 25.0% | 31 | ----------------------------------------------------------- RESULT: FAILED (minimum: 80.0%, actual: 25.0%) ----------------------------------------------------------- 32 | """ 33 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/mito_example/iris.csv: -------------------------------------------------------------------------------- 1 | ,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm) 2 | 0,5.1,3.5,1.4,0.2 3 | 1,4.9,3.0,1.4,0.2 4 | 2,4.7,3.2,1.3,0.2 5 | 3,4.6,3.1,1.5,0.2 6 | 4,5.0,3.6,1.4,0.2 7 | 5,5.4,3.9,1.7,0.4 8 | 6,4.6,3.4,1.4,0.3 9 | 7,5.0,3.4,1.5,0.2 10 | 8,4.4,2.9,1.4,0.2 11 | 9,4.9,3.1,1.5,0.1 12 | 10,5.4,3.7,1.5,0.2 13 | 11,4.8,3.4,1.6,0.2 14 | 12,4.8,3.0,1.4,0.1 15 | 13,4.3,3.0,1.1,0.1 16 | 14,5.8,4.0,1.2,0.2 17 | 15,5.7,4.4,1.5,0.4 18 | 16,5.4,3.9,1.3,0.4 19 | 17,5.1,3.5,1.4,0.3 20 | 18,5.7,3.8,1.7,0.3 21 | 19,5.1,3.8,1.5,0.3 22 | 20,5.4,3.4,1.7,0.2 23 | 21,5.1,3.7,1.5,0.4 24 | 22,4.6,3.6,1.0,0.2 25 | 23,5.1,3.3,1.7,0.5 26 | 24,4.8,3.4,1.9,0.2 27 | 25,5.0,3.0,1.6,0.2 28 | 26,5.0,3.4,1.6,0.4 29 | 27,5.2,3.5,1.5,0.2 30 | 28,5.2,3.4,1.4,0.2 31 | 29,4.7,3.2,1.6,0.2 32 | 30,4.8,3.1,1.6,0.2 33 | 31,5.4,3.4,1.5,0.4 34 | 32,5.2,4.1,1.5,0.1 35 | 33,5.5,4.2,1.4,0.2 36 | 34,4.9,3.1,1.5,0.2 37 | 35,5.0,3.2,1.2,0.2 38 | 36,5.5,3.5,1.3,0.2 39 | 37,4.9,3.6,1.4,0.1 40 | 38,4.4,3.0,1.3,0.2 41 | 39,5.1,3.4,1.5,0.2 42 | 40,5.0,3.5,1.3,0.3 43 | 41,4.5,2.3,1.3,0.3 44 | 42,4.4,3.2,1.3,0.2 45 | 43,5.0,3.5,1.6,0.6 46 | 44,5.1,3.8,1.9,0.4 47 | 45,4.8,3.0,1.4,0.3 48 | 46,5.1,3.8,1.6,0.2 49 | 47,4.6,3.2,1.4,0.2 50 | 48,5.3,3.7,1.5,0.2 51 | 49,5.0,3.3,1.4,0.2 52 | 50,7.0,3.2,4.7,1.4 53 | 51,6.4,3.2,4.5,1.5 54 | 52,6.9,3.1,4.9,1.5 55 | 53,5.5,2.3,4.0,1.3 56 | 54,6.5,2.8,4.6,1.5 57 | 55,5.7,2.8,4.5,1.3 58 | 56,6.3,3.3,4.7,1.6 59 | 57,4.9,2.4,3.3,1.0 60 | 58,6.6,2.9,4.6,1.3 61 | 59,5.2,2.7,3.9,1.4 62 | 60,5.0,2.0,3.5,1.0 63 | 61,5.9,3.0,4.2,1.5 64 | 62,6.0,2.2,4.0,1.0 65 | 63,6.1,2.9,4.7,1.4 66 | 64,5.6,2.9,3.6,1.3 67 | 65,6.7,3.1,4.4,1.4 68 | 66,5.6,3.0,4.5,1.5 69 | 67,5.8,2.7,4.1,1.0 70 | 68,6.2,2.2,4.5,1.5 71 | 69,5.6,2.5,3.9,1.1 72 | 70,5.9,3.2,4.8,1.8 73 | 71,6.1,2.8,4.0,1.3 74 | 72,6.3,2.5,4.9,1.5 75 | 73,6.1,2.8,4.7,1.2 76 | 74,6.4,2.9,4.3,1.3 77 | 75,6.6,3.0,4.4,1.4 78 | 76,6.8,2.8,4.8,1.4 79 | 77,6.7,3.0,5.0,1.7 80 | 78,6.0,2.9,4.5,1.5 81 | 79,5.7,2.6,3.5,1.0 82 | 80,5.5,2.4,3.8,1.1 83 | 81,5.5,2.4,3.7,1.0 84 | 82,5.8,2.7,3.9,1.2 85 | 83,6.0,2.7,5.1,1.6 86 | 84,5.4,3.0,4.5,1.5 87 | 85,6.0,3.4,4.5,1.6 88 | 86,6.7,3.1,4.7,1.5 89 | 87,6.3,2.3,4.4,1.3 90 | 88,5.6,3.0,4.1,1.3 91 | 89,5.5,2.5,4.0,1.3 92 | 90,5.5,2.6,4.4,1.2 93 | 91,6.1,3.0,4.6,1.4 94 | 92,5.8,2.6,4.0,1.2 95 | 93,5.0,2.3,3.3,1.0 96 | 94,5.6,2.7,4.2,1.3 97 | 95,5.7,3.0,4.2,1.2 98 | 96,5.7,2.9,4.2,1.3 99 | 97,6.2,2.9,4.3,1.3 100 | 98,5.1,2.5,3.0,1.1 101 | 99,5.7,2.8,4.1,1.3 102 | 100,6.3,3.3,6.0,2.5 103 | 101,5.8,2.7,5.1,1.9 104 | 102,7.1,3.0,5.9,2.1 105 | 103,6.3,2.9,5.6,1.8 106 | 104,6.5,3.0,5.8,2.2 107 | 105,7.6,3.0,6.6,2.1 108 | 106,4.9,2.5,4.5,1.7 109 | 107,7.3,2.9,6.3,1.8 110 | 108,6.7,2.5,5.8,1.8 111 | 109,7.2,3.6,6.1,2.5 112 | 110,6.5,3.2,5.1,2.0 113 | 111,6.4,2.7,5.3,1.9 114 | 112,6.8,3.0,5.5,2.1 115 | 113,5.7,2.5,5.0,2.0 116 | 114,5.8,2.8,5.1,2.4 117 | 115,6.4,3.2,5.3,2.3 118 | 116,6.5,3.0,5.5,1.8 119 | 117,7.7,3.8,6.7,2.2 120 | 118,7.7,2.6,6.9,2.3 121 | 119,6.0,2.2,5.0,1.5 122 | 120,6.9,3.2,5.7,2.3 123 | 121,5.6,2.8,4.9,2.0 124 | 122,7.7,2.8,6.7,2.0 125 | 123,6.3,2.7,4.9,1.8 126 | 124,6.7,3.3,5.7,2.1 127 | 125,7.2,3.2,6.0,1.8 128 | 126,6.2,2.8,4.8,1.8 129 | 127,6.1,3.0,4.9,1.8 130 | 128,6.4,2.8,5.6,2.1 131 | 129,7.2,3.0,5.8,1.6 132 | 130,7.4,2.8,6.1,1.9 133 | 131,7.9,3.8,6.4,2.0 134 | 132,6.4,2.8,5.6,2.2 135 | 133,6.3,2.8,5.1,1.5 136 | 134,6.1,2.6,5.6,1.4 137 | 135,7.7,3.0,6.1,2.3 138 | 136,6.3,3.4,5.6,2.4 139 | 137,6.4,3.1,5.5,1.8 140 | 138,6.0,3.0,4.8,1.8 141 | 139,6.9,3.1,5.4,2.1 142 | 140,6.7,3.1,5.6,2.4 143 | 141,6.9,3.1,5.1,2.3 144 | 142,5.8,2.7,5.1,1.9 145 | 143,6.8,3.2,5.9,2.3 146 | 144,6.7,3.3,5.7,2.5 147 | 145,6.7,3.0,5.2,2.3 148 | 146,6.3,2.5,5.0,1.9 149 | 147,6.5,3.0,5.2,2.0 150 | 148,6.2,3.4,5.4,2.3 151 | 149,5.9,3.0,5.1,1.8 152 | -------------------------------------------------------------------------------- /code_snippets/notebook/watermark_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "coated-cyprus", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "pip install watermark " 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "emerging-party", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "%load_ext watermark" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 12, 26 | "id": "enabling-applicant", 27 | "metadata": { 28 | "ExecuteTime": { 29 | "end_time": "2021-07-07T13:14:52.253902Z", 30 | "start_time": "2021-07-07T13:14:52.222631Z" 31 | } 32 | }, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "Last updated: 2021-07-07T08:14:52.226814-05:00\n", 39 | "\n", 40 | "Python implementation: CPython\n", 41 | "Python version : 3.8.1\n", 42 | "IPython version : 7.24.0\n", 43 | "\n", 44 | "Compiler : GCC 7.5.0\n", 45 | "OS : Linux\n", 46 | "Release : 5.4.0-77-generic\n", 47 | "Machine : x86_64\n", 48 | "Processor : x86_64\n", 49 | "CPU cores : 16\n", 50 | "Architecture: 64bit\n", 51 | "\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "%watermark" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 9, 62 | "id": "anonymous-piano", 63 | "metadata": { 64 | "ExecuteTime": { 65 | "end_time": "2021-07-07T13:09:37.538488Z", 66 | "start_time": "2021-07-07T13:09:37.086018Z" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "import numpy as np \n", 72 | "import pandas as pd \n", 73 | "import sklearn" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 10, 79 | "id": "concrete-italy", 80 | "metadata": { 81 | "ExecuteTime": { 82 | "end_time": "2021-07-07T13:09:37.571037Z", 83 | "start_time": "2021-07-07T13:09:37.539577Z" 84 | } 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "json : 2.0.9\n", 92 | "pandas : 1.2.3\n", 93 | "autopep8: 1.5.7\n", 94 | "sklearn : 0.0\n", 95 | "numpy : 1.21.0\n", 96 | "isort : 5.8.0\n", 97 | "\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "%watermark --iversions # Show the versions of libraries being used" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "id": "treated-cleanup", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.8.1" 131 | }, 132 | "toc": { 133 | "base_numbering": 1, 134 | "nav_menu": {}, 135 | "number_sections": true, 136 | "sideBar": true, 137 | "skip_h1_title": false, 138 | "title_cell": "Table of Contents", 139 | "title_sidebar": "Contents", 140 | "toc_cell": false, 141 | "toc_position": {}, 142 | "toc_section_display": true, 143 | "toc_window_display": false 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 5 148 | } 149 | -------------------------------------------------------------------------------- /code_snippets/pandas/highlight_pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "therapeutic-binary", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-06-20T16:52:56.992148Z", 10 | "start_time": "2021-06-20T16:52:56.734486Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "import pandas as pd " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 7, 21 | "id": "modern-reducing", 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2021-06-20T16:54:05.689246Z", 25 | "start_time": "2021-06-20T16:54:05.676710Z" 26 | } 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "df = pd.DataFrame({'col1': [-5, -2, 1, 4], 'col2': [2, 3, -1, 4]})" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 8, 36 | "id": "mighty-edwards", 37 | "metadata": { 38 | "ExecuteTime": { 39 | "end_time": "2021-06-20T16:54:06.012566Z", 40 | "start_time": "2021-06-20T16:54:06.005511Z" 41 | } 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def highlight_number(row):\n", 46 | " return [\n", 47 | " 'background-color: red; color: white' if cell <= 0\n", 48 | " else 'background-color: green; color: white'\n", 49 | " for cell in row\n", 50 | " ]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 9, 56 | "id": "authentic-crowd", 57 | "metadata": { 58 | "ExecuteTime": { 59 | "end_time": "2021-06-20T16:54:06.443918Z", 60 | "start_time": "2021-06-20T16:54:06.419712Z" 61 | } 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/html": [ 67 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
col1 col2
0-52
1-23
21-1
344
" 96 | ], 97 | "text/plain": [ 98 | "" 99 | ] 100 | }, 101 | "execution_count": 9, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "df.style.apply(highlight_number)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "durable-speed", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [] 117 | } 118 | ], 119 | "metadata": { 120 | "kernelspec": { 121 | "display_name": "Python 3", 122 | "language": "python", 123 | "name": "python3" 124 | }, 125 | "language_info": { 126 | "codemirror_mode": { 127 | "name": "ipython", 128 | "version": 3 129 | }, 130 | "file_extension": ".py", 131 | "mimetype": "text/x-python", 132 | "name": "python", 133 | "nbconvert_exporter": "python", 134 | "pygments_lexer": "ipython3", 135 | "version": "3.8.1" 136 | }, 137 | "toc": { 138 | "base_numbering": 1, 139 | "nav_menu": {}, 140 | "number_sections": true, 141 | "sideBar": true, 142 | "skip_h1_title": false, 143 | "title_cell": "Table of Contents", 144 | "title_sidebar": "Contents", 145 | "toc_cell": false, 146 | "toc_position": {}, 147 | "toc_section_display": true, 148 | "toc_window_display": false 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 5 153 | } 154 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/newspaper3k.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "unnecessary-gilbert", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-07-10T13:22:43.082803Z", 10 | "start_time": "2021-07-10T13:22:28.715292Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "pip install newspaper3k" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "id": "bulgarian-austria", 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2021-07-10T13:23:21.852155Z", 25 | "start_time": "2021-07-10T13:23:18.996409Z" 26 | } 27 | }, 28 | "outputs": [ 29 | { 30 | "name": "stderr", 31 | "output_type": "stream", 32 | "text": [ 33 | "[nltk_data] Downloading package punkt to /home/user/nltk_data...\n", 34 | "[nltk_data] Unzipping tokenizers/punkt.zip.\n" 35 | ] 36 | }, 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "True" 41 | ] 42 | }, 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "from newspaper import Article\n", 50 | "import nltk\n", 51 | "nltk.download('punkt')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "id": "municipal-strain", 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2021-07-10T13:24:11.568005Z", 61 | "start_time": "2021-07-10T13:24:11.255995Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "url = \"https://www.dataquest.io/blog/learn-data-science/\"\n", 67 | "article = Article(url)\n", 68 | "article.download()\n", 69 | "article.parse()" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 6, 75 | "id": "roman-postage", 76 | "metadata": { 77 | "ExecuteTime": { 78 | "end_time": "2021-07-10T13:24:21.243417Z", 79 | "start_time": "2021-07-10T13:24:21.240287Z" 80 | } 81 | }, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "'How to Learn Data Science (A step-by-step guide)'" 87 | ] 88 | }, 89 | "execution_count": 6, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "article.title" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "id": "brave-sussex", 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2021-07-10T13:24:27.866249Z", 105 | "start_time": "2021-07-10T13:24:27.856273Z" 106 | } 107 | }, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "datetime.datetime(2020, 5, 4, 7, 1, tzinfo=tzutc())" 113 | ] 114 | }, 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "article.publish_date" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 8, 127 | "id": "behind-familiar", 128 | "metadata": { 129 | "ExecuteTime": { 130 | "end_time": "2021-07-10T13:24:34.598082Z", 131 | "start_time": "2021-07-10T13:24:34.594198Z" 132 | } 133 | }, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "'https://www.dataquest.io/wp-content/uploads/2020/05/learn-data-science.jpg'" 139 | ] 140 | }, 141 | "execution_count": 8, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "article.top_image" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 9, 153 | "id": "formed-declaration", 154 | "metadata": { 155 | "ExecuteTime": { 156 | "end_time": "2021-07-10T13:24:41.152427Z", 157 | "start_time": "2021-07-10T13:24:41.112948Z" 158 | } 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "article.nlp()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 12, 168 | "id": "olympic-conditioning", 169 | "metadata": { 170 | "ExecuteTime": { 171 | "end_time": "2021-07-10T13:33:00.606867Z", 172 | "start_time": "2021-07-10T13:33:00.602999Z" 173 | } 174 | }, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "'How to Learn Data Science (A step-by-step guide)There’s no doubt about it: data scientists are in high demand.\\nHow to Learn Data ScienceSo how do you start to learn data science?\\nIf you want to learn data science or just pick up some data science skills, your first goal should be to learn to love data.\\nRather, consider it as a rough set of guidelines to follow as you learn data science on your own path.\\nI personally believe that anyone can learn data science if they approach it with the right frame of mind.'" 180 | ] 181 | }, 182 | "execution_count": 12, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "article.summary" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 10, 194 | "id": "gentle-princeton", 195 | "metadata": { 196 | "ExecuteTime": { 197 | "end_time": "2021-07-10T13:24:46.465668Z", 198 | "start_time": "2021-07-10T13:24:46.462253Z" 199 | } 200 | }, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "['learn',\n", 206 | " 'skills',\n", 207 | " 'work',\n", 208 | " 'stepbystep',\n", 209 | " 'youll',\n", 210 | " 'guide',\n", 211 | " 'learning',\n", 212 | " 'need',\n", 213 | " 'science',\n", 214 | " 'data',\n", 215 | " 'youre',\n", 216 | " 'scientists']" 217 | ] 218 | }, 219 | "execution_count": 10, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "article.keywords" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "accompanied-strengthening", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "kernelspec": { 239 | "display_name": "Python 3", 240 | "language": "python", 241 | "name": "python3" 242 | }, 243 | "language_info": { 244 | "codemirror_mode": { 245 | "name": "ipython", 246 | "version": 3 247 | }, 248 | "file_extension": ".py", 249 | "mimetype": "text/x-python", 250 | "name": "python", 251 | "nbconvert_exporter": "python", 252 | "pygments_lexer": "ipython3", 253 | "version": "3.8.1" 254 | }, 255 | "toc": { 256 | "base_numbering": 1, 257 | "nav_menu": {}, 258 | "number_sections": true, 259 | "sideBar": true, 260 | "skip_h1_title": false, 261 | "title_cell": "Table of Contents", 262 | "title_sidebar": "Contents", 263 | "toc_cell": false, 264 | "toc_position": {}, 265 | "toc_section_display": true, 266 | "toc_window_display": false 267 | } 268 | }, 269 | "nbformat": 4, 270 | "nbformat_minor": 5 271 | } 272 | -------------------------------------------------------------------------------- /code_snippets/data_science_tools/folium_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "juvenile-consortium", 7 | "metadata": { 8 | "ExecuteTime": { 9 | "end_time": "2021-06-20T15:40:03.652006Z", 10 | "start_time": "2021-06-20T15:40:00.949060Z" 11 | } 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "pip install folium " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "id": "assigned-conservative", 22 | "metadata": { 23 | "ExecuteTime": { 24 | "end_time": "2021-06-20T15:41:42.638978Z", 25 | "start_time": "2021-06-20T15:41:42.627499Z" 26 | } 27 | }, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
Make this Notebook Trusted to load map: File -> Trust Notebook
" 33 | ], 34 | "text/plain": [ 35 | "" 36 | ] 37 | }, 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "output_type": "execute_result" 41 | } 42 | ], 43 | "source": [ 44 | "import folium\n", 45 | "m = folium.Map(location=[45.5236, -122.6750])\n", 46 | "\n", 47 | "tooltip = 'Click me!'\n", 48 | "folium.Marker([45.3288, -121.6625], popup='Mt. Hood Meadows',\n", 49 | " tooltip=tooltip).add_to(m)\n", 50 | "m " 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "occasional-sudan", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "Python 3", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.8.1" 79 | }, 80 | "toc": { 81 | "base_numbering": 1, 82 | "nav_menu": {}, 83 | "number_sections": true, 84 | "sideBar": true, 85 | "skip_h1_title": false, 86 | "title_cell": "Table of Contents", 87 | "title_sidebar": "Contents", 88 | "toc_cell": false, 89 | "toc_position": {}, 90 | "toc_section_display": true, 91 | "toc_window_display": false 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 5 96 | } 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python and Data Science Code Snippets 2 | Source code of Python and data science snippets posted daily at [Data Science Simplified](https://mathdatasimplified.com/). You can receive these daily tips in your mailbox for free by [subscribing to the website](https://mathdatasimplified.com/). 3 | 4 | To get access to these daily tips on the command line, install [python-snippet](https://github.com/khuyentran1401/python_snippet). 5 | 6 | # Contents 7 | * [Python Built-in Methods](#python) 8 | * [Pandas](#pandas) 9 | * [Numpy](#numpy) 10 | * [Data Science Tools](#data-science-tools) 11 | * [Terminal](#terminal) 12 | * [Cool Tools](#cool-tools) 13 | * [Jupyter Notebook](#jupyter-notebook) 14 | 15 | 16 |

Python Built-in Methods

17 | 18 | ### Number 19 | 20 | | Title | Explanation | Code | 21 | | ------------- |:-------------:| :-----:| 22 | | Get Multiples of a Number Using Modulus | [link](https://mathdatasimplified.com/2021/04/22/get-multiples-of-a-numbers-using-modulus/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/multiples_of_a_number.py) 23 | | fractions: Get Numerical Results in Fractions instead of Decimals | [link](https://mathdatasimplified.com/2021/02/27/fractions-get-numerical-results-in-fractions-instead-of-decimals/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/fractions_example.py) 24 | | How to Use Underscores to Format Large Numbers in Python | [link](https://mathdatasimplified.com/2021/01/12/how-to-use-underscores-to-format-large-numbers-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/underscore_large_number.py) 25 | | Confirm whether a variable is a number | [link](https://mathdatasimplified.com/2020/11/23/confirm-whether-a-variable-is-a-number/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/check_if_number.py) 26 | | Get a Division, Floor Division, And The Remainder of a Division in Python | [link](https://mathdatasimplified.com/2021/08/31/get-a-division-floor-division-and-the-remainder-of-a-division-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/division_operators.py) 27 | 28 | ### Boolean 29 | 30 | | Title | Explanation | Code | 31 | | ------------- |:-------------:| :-----:| 32 | | Boolean Operators: Connect Two Boolean Expressions into One Expression | [link](https://mathdatasimplified.com/2021/05/15/boolean-operators-connect-two-boolean-expressions-into-one-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/boolean_operators.py) 33 | 34 | ### String 35 | 36 | | Title | Explanation | Code | 37 | | ------------- |:-------------:| :-----:| 38 | | `__str__` and `__repr__`: Create a String Representation of a Python Object | [link](https://mathdatasimplified.com/2021/05/11/__str__-and-__repr__-create-a-string-representation-of-a-python-bbject/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/__str__and__repr.py) 39 | | String find: Find the Index of a Substring in a Python String | [link](https://mathdatasimplified.com/2021/05/01/string-find-find-the-index-of-a-substring-in-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/string_find.py) 40 | | eval: Turn a Python String into a Variable or Function | [link](https://mathdatasimplified.com/2021/03/13/eval-turn-a-python-string-into-a-variable-or-function/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/eval_example.py) 41 | | re.sub: Replace One String with Another String Using Regular Expression | [link](https://mathdatasimplified.com/2021/03/07/re-sub-replace-one-string-with-another-string-using-regular-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/re_sub_example.py) 42 | 43 | ### List 44 | 45 | | Title | Explanation | Code | 46 | | ------------- |:-------------:| :-----:| 47 | | any: Check if Any Element of an Iterable is True | [link](https://mathdatasimplified.com/2021/06/01/any-check-if-any-element-of-an-iterable-is-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/any_example.py) 48 | | Extended Iterable Unpacking: Ignore Multiple Values when Unpacking a Python Iterable | [link](https://mathdatasimplified.com/2021/05/03/extended-iterable-unpacking-ignore-multiple-values-when-unpacking-a-python-iterable/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/extended_iterable_unpacking.py) 49 | | How to Unpack Iterables in Python | [link](https://mathdatasimplified.com/2021/04/07/how-to-unpack-iterables-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/unpack_iterables.py) 50 | | random.choice: Get a Randomly Selected Element from a Python List | [link](https://mathdatasimplified.com/2021/02/28/random-choice-get-a-randomly-selected-element-from-a-python-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/random_choice.py) 51 | | random.sample: Get n Random Elements From a List | [link](https://mathdatasimplified.com/2021/09/09/random-sample-get-n-random-elements-from-a-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/random_sample.py) 52 | | filter: Get the Elements of an Iterable that a Function Returns True | [link](https://mathdatasimplified.com/2021/06/11/filter-get-the-elements-of-an-iterable-that-a-function-returns-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/filter_example.py) 53 | | heapq: Find n Max Values of a Python List | [link](https://mathdatasimplified.com/2021/03/28/heapq-find-n-max-values-of-a-python-list/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/heapq_example.py) 54 | | join method: Turn an Iterable into a Python String | [link](https://mathdatasimplified.com/2021/06/17/join-method-turn-an-iterable-to-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/join_list.py) 55 | | Zip: Associate Elements from Two Iterators based on the Order | [link](https://mathdatasimplified.com/2021/02/05/zip-associate-elements-from-two-iterators-based-on-the-order/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/zip_example.py) 56 | | collections.Counter: Count the Occurrences of Items in a List |[link](https://mathdatasimplified.com/2021/02/02/collections-counter-count-the-occurrences-of-items-in-a-list/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_counter.py) 57 | | Zip Function: Create Pairs of Elements from Two Lists in Python | [link](https://mathdatasimplified.com/2021/01/11/zip-function-create-pairs-of-elements-from-two-lists-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/zip_function.py) 58 | | Stop using = operator to create a copy of a Python list. Use copy method instead | [link](https://mathdatasimplified.com/2021/01/09/stop-using-operator-to-create-a-copy-of-a-python-list-use-copy-method-instead/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/copy_method.py) 59 | | itertools.combinations: A better way to iterate through a pair of values in a Python list | [link](https://mathdatasimplified.com/2020/12/12/itertools-combinations-a-better-way-to-iterate-through-a-pair-of-values-in-a-python-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/itertools_combinations_example.py) 60 | | itertools.product: Nested For-Loops in a Generator Expression | [link](https://mathdatasimplified.com/2021/08/03/itertools-product-nested-for-loops-in-a-generator-expression/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/itertools_examples.py) 61 | | itertools.islice: Get Items From an Iterable That are Within a Certain Range With a Specific Incrementation | [link](https://mathdatasimplified.com/2021/09/07/itertools-islice-get-items-from-an-iterable-that-are-within-a-certain-range-with-a-specific-incrementation/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/itertools_islice.py) 62 | | Enumerate | [link](https://mathdatasimplified.com/2020/11/23/enumerate/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/enumerate_example.py) 63 | | set.intersection: Find the Intersection Between 2 Sets | [link](https://mathdatasimplified.com/2021/06/29/set-intersection-find-the-intersection-between-2-sets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/set_intersection.py) 64 | | Set Difference: Find the Difference Between 2 Sets | [link](https://mathdatasimplified.com/2021/08/26/set-difference-find-the-difference-between-2-sets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/set_difference.py) 65 | | Difference between list append and list extend | [link](https://mathdatasimplified.com/2021/07/27/difference-between-list-append-and-list-extend/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_extend.py) 66 | | map method: Apply a Function to Each Item of an Iterable | [link](https://mathdatasimplified.com/2021/08/19/map-method-apply-a-function-to-each-item-of-an-iterable/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_map.py) 67 | | Why Should you Rewrite a For-Loop as a List Comprehension? | [link](https://mathdatasimplified.com/2021/08/17/why-should-you-rewrite-a-for-loop-as-a-list-comprehension/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/list_comprehension.py) 68 | 69 | ### Tuple 70 | 71 | | Title | Explanation | Code | 72 | | ------------- |:-------------:| :-----:| 73 | | namedtuple: A Lightweight Python Structure to Mange your Data | [link](https://mathdatasimplified.com/2021/02/22/namedtuple-a-lightweight-python-structure-to-mange-your-data/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/namedtuple_example.py) 74 | | slice: Make your Indices more Readable by Naming your Slice | [link](https://mathdatasimplified.com/2021/02/16/slice-make-your-indices-more-readable-by-naming-your-slice/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/slice_example.py) 75 | 76 | ### Dictionary 77 | 78 | | Title | Explanation | Code | 79 | | ------------- |:-------------:| :-----:| 80 | | Defaultdict: Return a default value when a key is not available | [link](https://mathdatasimplified.com/2020/12/09/how-to-return-a-default-value-when-a-key-is-not-in-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_defaultdict.py) 81 | | Defaultdict: Create a Dictionary with Values that are List | [link](https://mathdatasimplified.com/2021/07/22/defaultdict-create-a-dictionary-with-the-values-that-are-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/defaultdict_example.py) 82 | | Ordered dictionary in Python | [link](https://mathdatasimplified.com/2020/11/23/ordered-dictionary-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/collections_ordereddict.py) 83 | 84 | ### Datetime 85 | 86 | | Title | Explanation | Code | 87 | | ------------- |:-------------:| :-----:| 88 | | datetime + timedelta: Calculate End DateTime based on Start DateTime and Duration | [link](https://mathdatasimplified.com/2021/03/04/datetime-timedelta-calculate-end-datetime-based-on-start-datetime-and-duration/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/datetime_timedelta.py) 89 | | Use Dates in a Month as the Feature | [link](https://mathdatasimplified.com/2020/11/23/use-dates-in-a-month-as-the-feature/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/dates_in_month_as_feature.py) 90 | 91 | ### Function 92 | 93 | | Title | Explanation | Code | 94 | | ------------- |:-------------:| :-----:| 95 | | *iterator: Pass Values of an Iterator to a Function | [link](https://mathdatasimplified.com/2021/05/05/iterator-pass-values-of-an-iterator-to-a-function/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/args_example.py) 96 | | Use Python Built-in Functions to Speed your Code | [link](https://mathdatasimplified.com/2021/01/29/use-python-built-in-functions-to-speed-your-code/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/built_in_functions_speed.py) 97 | | **kwargs: Pass multiple arguments to a function in Python |[link](https://mathdatasimplified.com/2020/12/26/kwargs-pass-multiple-arguments-to-a-function-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/kwargs.py) 98 | | Return Multiple Values from a Function Using Python Dictionary | [link](https://mathdatasimplified.com/2020/12/11/return-multiple-values-from-a-function-using-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/return_multiple_values_with_dictionary.py) 99 | | Decorator in Python| [link](https://mathdatasimplified.com/2020/11/25/decorator-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/decorator_example.py) 100 | | functools.partial: Generate a New Function with Fewer Arguments | [link](https://mathdatasimplified.com/2021/07/06/functools-partial-generate-a-new-function-with-fewer-arguments/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/partial_function.py) 101 | | singledispatch: Call Another Function Based on the Type of the Current Function’s Argument | [link](https://mathdatasimplified.com/2021/09/02/singledispatch-call-another-function-based-on-the-type-of-the-current-functions-argument/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/singledispatch_example.py) 102 | 103 | ### Classes 104 | 105 | | Title | Explanation | Code | 106 | | ------------- |:-------------:| :-----:| 107 | | Abstract Classes: Declare Methods without Implementation | [link](https://mathdatasimplified.com/2021/06/08/abstract-classes-declare-methods-without-implementation/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/abc_example.py) | 108 | | classmethod: What is it and When to Use it | [link](https://mathdatasimplified.com/2021/04/24/classmethod-what-is-it-and-when-to-use-it/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/classmethod_example.py) 109 | | getattr: a Better Way to Get the Attribute of a Class | [link](https://mathdatasimplified.com/2021/02/23/getattr-a-better-way-to-get-the-attribute-of-a-class/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/getattr_example.py) 110 | | `__call__`: You can Call your Class Instance like a Function. Here is how | [link](https://mathdatasimplified.com/2021/01/22/__call__-you-can-call-your-class-instance-like-a-function-here-is-how/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/__call__example.py) 111 | | Static method: use the function without adding the attributes required for a new instance | [link](https://mathdatasimplified.com/2020/11/23/static-method-use-the-function-without-adding-the-attributes-required-for-a-new-instance/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/staticmethod_example.py) 112 | | Property Decorator: A Pythonic Way to Use Getters and Setters | [link](https://mathdatasimplified.com/2021/07/01/property-decorator-a-pythonic-way-to-use-getters-and-setters/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/python/property_decorator.py) 113 | 114 | ### Files 115 | 116 | | Title | Explanation | Code | 117 | | ------------- |:-------------:| :-----:| 118 | | Shutil: Move Files in Python | [link](https://mathdatasimplified.com/2021/06/03/shutil-move-files-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/move_files/move_files.py) 119 | | pathlith.Path | [link](https://mathdatasimplified.com/2020/11/23/pathlith-path/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_path) 120 | | pathlib: Create, Write, and Rename Files in One Line of Code | [link](https://mathdatasimplified.com/2021/02/14/pathlib-create-write-and-rename-files-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_write) 121 | | Pathlib: Iterate Over All Files that End with ‘.csv’ in a Directory | [link](https://mathdatasimplified.com/2020/12/31/pathlib-iterate-over-all-files-that-end-with-csv-in-a-directory/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pathlib_iterate_files_end_with) 122 | | Path.parents: Get the Parent Directory of a File | [link](https://mathdatasimplified.com/2021/06/24/path-parents-get-the-parent-directory-of-a-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/path_parents) 123 | | How to Improve the Readability of your JSON file using Indent| [link](https://mathdatasimplified.com/2021/04/27/how-to-improve-the-readability-of-your-json-file-using-indent/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/improve_json_readability.py) 124 | | `__main__.py`: Run a Directory like a Main Script | [link](https://mathdatasimplified.com/2021/03/15/__main__-py-run-a-directory-like-a-main-script/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/main_example) 125 | 126 | ### Error handling 127 | 128 | | Title | Explanation | Code | 129 | | ------------- |:-------------:| :-----:| 130 | | Assert in Python: Output a Customized Message When the Assertion Fails | [link](https://mathdatasimplified.com/2021/04/13/assert-in-python-output-a-customized-message-when-the-assertion-fails/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/assert_customize_message.py) 131 | | warnings: Ignore Warnings when Running Python Code | [link](https://mathdatasimplified.com/2021/03/11/warnings-ignore-warnings-when-running-python-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/warnings_example.py) 132 | 133 | ### Interact with Terminal 134 | 135 | | Title | Explanation | Code | 136 | | ------------- |:-------------:| :-----:| 137 | | How to Execute Shell Commands in a Python Script | [link](https://mathdatasimplified.com/2021/04/10/how-to-execute-shell-commands-in-a-python-script/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/os_system.py) 138 | | argparse: Python Library to Parse Arguments from Command Line | [link](https://mathdatasimplified.com/2020/12/23/argparse-python-library-to-parse-arguments-from-command-line/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/argparse_example.py) 139 | 140 | ### Best Practices 141 | 142 | | Title | Explanation | Code | 143 | | ------------- |:-------------:| :-----:| 144 | | Stop Writing Code Comments. Use Meaningful Names Instead | [link](https://mathdatasimplified.com/2021/01/14/stop-writing-code-comments-use-meaningful-names-instead/) 145 | | Underscore(_): Ignore values that will not be used | [link](https://mathdatasimplified.com/2020/12/25/underscore_-ignore-values-that-will-not-be-used/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/ignore_variables.py) 146 | | Underscore “_”: Ignore the index in Python for loops | [link](https://mathdatasimplified.com/2020/12/20/underscore-_-ignore-the-index-in-python-for-loops/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/underscore_for_loop.py) 147 | | Save Immediate Output when an Error Occurs | [link](https://mathdatasimplified.com/2020/12/10/save-immediate-output-when-an-error-occurs/) 148 | | Print error without stopping the for loop in Python | [link](https://mathdatasimplified.com/2020/12/06/print-error-without-stopping-the-for-loop-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/print_error.py) 149 | | Python Pass Statement | [link](https://mathdatasimplified.com/2020/12/02/python-pass-statement/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/pass_statement.py) 150 | | Type hint in Python 3.9 | [link](https://mathdatasimplified.com/2020/11/23/type-hint-in-python-3-9/) 151 | 152 | ### Code Speed 153 | 154 | | Title | Explanation | Code | 155 | | ------------- |:-------------:| :-----:| 156 | | Concurrently execute tasks on separate CPUs | [link](https://mathdatasimplified.com/2020/11/23/concurrently-execute-tasks-on-separate-cpus/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/multiprocessing_example.py) 157 | | Compare the execution time between 2 functions |[link](https://mathdatasimplified.com/2020/11/23/compare-the-execution-time-between-2-functions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/compare_execution_time.py) 158 | 159 | 160 |

Pandas

161 | 162 | ### Change Values 163 | 164 | | Title | Explanation | Code | 165 | | ------------- |:-------------:| :-----:| 166 | | pd.DataFrame.agg: Aggregate over Columns or Rows Using Multiple Operations | [link](https://mathdatasimplified.com/2021/05/09/pd-dataframe-agg-aggregate-over-columns-or-rows-using-multiple-operations/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_dataframe_agg.py) 167 | | pandas.DataFrame.agg: Apply Different Aggregations to Different Columns | [link](https://mathdatasimplified.com/2021/07/29/pandas-dataframe-agg-apply-different-aggregations-to-different-columns/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_agg.py) 168 | | DataFrame.pipe: Increase the Readability of your Code when Applying Multiple Functions to a DataFrame | [link](https://mathdatasimplified.com/2021/04/20/dataframe-pipe-increase-the-readability-of-your-code-when-applying-multiple-functions-to-a-dataframe/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/dataframe_pipe.py) 169 | | pd.Series.map: Change Values of a Pandas Series Using a Dictionary | [link](https://mathdatasimplified.com/2021/05/21/pd-series-map-change-values-of-a-pandas-series-using-a-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_Series_map.py) 170 | | pd.Series.str: Manipulate Text Data in a pandas Series | [link](https://mathdatasimplified.com/2021/04/03/pd-series-str-manipulate-text-data-in-a-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_str.py) 171 | | set_categories in pandas: Sort Categorical Column by a Specific Ordering | [link](https://mathdatasimplified.com/2021/02/09/set_categories-in-pandas-how-to-sort-categorical-column-by-a-specific-ordering/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/set_categories.py) 172 | | parse_dates: Convert Columns into Datetime When Using Pandas to Read CSV Files | [link](https://mathdatasimplified.com/2021/01/02/parse_dates-convert-columns-into-datetime-when-using-pandas-to-read-csv-files/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/parse_dates.py) 173 | | Filter Rows only if Column Contains Values from another List | [link](https://mathdatasimplified.com/2020/12/19/filter-rows-only-if-column-contains-values-from-another-list/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/s.is_in.py) 174 | | Specify suffixes when using df.merge() | [link](https://mathdatasimplified.com/2020/12/01/specify-suffixes-when-using-df-merge/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_merge.py) 175 | | Specify the datatype to speed up your code and reduce memory | [link](https://mathdatasimplified.com/2020/11/23/specify-the-datatype-to-speed-up-your-code-and-reduce-memory/) 176 | | Highlight your pandas DataFrame | [link](https://mathdatasimplified.com/2020/11/23/highlight-your-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/highlight_pandas.ipynb) 177 | | Assign Values to Multiple New Columns | [link](https://mathdatasimplified.com/2020/11/23/assign-values-to-multiple-new-columns/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_assign.py) 178 | | Reduce pd.DataFrame’s Memory | [link](https://mathdatasimplified.com/2020/11/23/reduce-pd-dataframes-memory/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/reduce_memory.py) 179 | | pd.DataFrame.explode: Transform Each Element in an Iterable to a Row | [link](https://mathdatasimplified.com/2021/07/08/pd-dataframe-explode-transform-each-element-in-an-iterable-to-a-row/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_explode.py) 180 | | pandas.cut: Bin a DataFrame’s values into Discrete Intervals | [link](https://mathdatasimplified.com/2021/07/13/pandas-cut-bin-a-dataframes-values-into-discrete-intervals/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_cut.py) 181 | | Forward Fill in Pandas: Use the Previous Value to Fill the Current Missing Value | [link](https://mathdatasimplified.com/2021/08/20/forward-fill-use-the-previous/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_fillna.py) 182 | | pandas.pivot_table: Turn Your DataFrame Into a Pivot Table | [link](https://mathdatasimplified.com/2021/08/24/pandas-pivot_table-turn-your-dataframe-into-a-pivot-table/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_pivot.py) 183 | 184 | ### Get Values 185 | 186 | | Title | Explanation | Code | 187 | | ------------- |:-------------:| :-----:| 188 | | df.columns.str.startswith: Find DataFrame’s Columns that Start with a Pattern | [link](https://mathdatasimplified.com/2021/05/27/df-columns-str-startswith-find-dataframes-columns-that-start-with-a-pattern/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_columns_str_start_with.py) 189 | | pandas.DataFrame.iterrows: Iterate over Rows of a DataFrame | [link](https://mathdatasimplified.com/2021/06/15/pandas-dataframe-iterrows-iterate-over-rows-of-a-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_dataframe_iterrows.py) 190 | | pandas.Series.dt: Access Datetime Properties of pandas Series | [link](https://mathdatasimplified.com/2021/05/13/pandas-series-dt-access-datetime-properties-of-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_dt.py) 191 | | pd.Series.between: Select Rows in a pandas Series Containing Values between 2 Numbers | [link](https://mathdatasimplified.com/2021/03/03/pd-series-between-obtain-the-rows-with-values-lie-between-2-numbers/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pd_series_between.py) 192 | | DataFrame rolling: Find the average of the previous n datapoints using Pandas | [link](https://mathdatasimplified.com/2021/01/31/dataframe-rolling-find-the-average-of-the-previous-n-datapoints-using-pandas/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_rolling.py) 193 | | select_dtypes: Return a subset of a DataFrame including/excluding columns based on their dtype | [link](https://mathdatasimplified.com/2021/01/26/select_dtypes-return-a-subset-of-a-dataframe-including-excluding-columns-based-on-their-dtype/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/select_dtypes.py) 194 | | pct_change: Find the percentage change between the current and a prior element in a pandas Series | [link](https://mathdatasimplified.com/2021/01/19/pct_change-find-the-percentage-change-between-the-current-and-a-prior-element-in-a-pandas-series/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/pct_change.py) 195 | | DataFrame.diff and DataFrame.shift: Take the Difference between Rows within a Column in Pandas | [link](https://mathdatasimplified.com/2021/01/07/dataframe-diff-and-dataframe-shift-take-the-difference-between-rows-within-a-column-in-pandas/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/df_diff.py) 196 | | Pandas DataFrame: How to select all columns that start with a word | [link](https://mathdatasimplified.com/2020/11/27/pandas-dataframe-how-to-select-all-columns-that-start-with-a-word/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/select_columns_start_with.py) 197 | | Exclude Outliers | [link](https://mathdatasimplified.com/2020/11/23/exclude-outliers/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/exclude_outliers.py) 198 | | Pandas DataFrame Get Data in a Year Range | [link](https://mathdatasimplified.com/2020/11/23/pandas-dataframe-get-data-in-a-year-range/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/get_data_in_year_range.py) 199 | | pd.reindex: Replace the Values of the Missing Dates with 0 | [link](https://mathdatasimplified.com/2021/07/20/pd-reindex-replace-the-values-of-the-missing-dates-with-0/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_reindex.py) 200 | | Select DataFrame Rows Before or After a Specific Date | [link](https://mathdatasimplified.com/2021/07/23/select-dataframe-rows-before-or-after-a-specific-date/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/df_datetime_comparison.py) 201 | | DataFrame.groupby.sample: Get a Random Sample of Items from Each Category in a Column | [link](https://mathdatasimplified.com/2021/08/10/dataframe-groupby-sample-get-a-random-sample-of-items-from-each-category-in-a-column/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/pandas/pd_groupby_sample.py) 202 | 203 | ### Testing 204 | 205 | | Title | Explanation | Code | 206 | | ------------- |:-------------:| :-----:| 207 | | assert_frame equal: Test whether Two DataFrames are Similar | [link](https://mathdatasimplified.com/2021/04/15/assert_frame-equal-test-whether-two-dataframes-are-similar/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/pandas/assert_frame_equal.py) 208 | 209 | 210 | 211 | 212 |

Numpy

213 | 214 | | Title | Explanation | Code | 215 | | ------------- |:-------------:| :-----:| 216 | | np.ravel: Flatten a Numpy Array | [link](https://mathdatasimplified.com/2021/05/18/np-ravel-flatten-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/np_ravel.py) 217 | | Use List to Change the Positions of Rows or Columns in a Numpy Array | [link](https://mathdatasimplified.com/2021/05/07/use-list-to-change-the-positions-of-rows-or-columns-in-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/use_list_to_change_position_of_arrays.py) 218 | | Key Parameter in Max(): Find the Key with the Largest Value | [link](https://mathdatasimplified.com/2021/02/19/key-parameter-in-max-find-the-key-with-the-largest-value/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/python/key_in_max.py) 219 | | Difference between Numpy’s All and Any Methods | [link](https://mathdatasimplified.com/2021/03/31/difference-between-numpys-all-and-any-methods/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/any_all.py) 220 | | Double np.argsort: Get Rank of Values in an Array | [link](https://mathdatasimplified.com/2021/01/03/double-np-argsort-get-rank-of-values-in-an-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/double_np_argsort.py) 221 | | Get the index of the max value in a Numpy array | [link](https://mathdatasimplified.com/2020/12/15/get-the-index-of-the-max-value-in-a-numpy-array/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/numpy/np_argmax.py) 222 | | np.all: Test Whether All Elements along a Given Axis of a NumPy Array Evaluate to True | [link](https://mathdatasimplified.com/2021/06/22/np-all-test-whether-all-elements-along-a-given-axis-of-a-numpy-array-evaluate-to-true/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_array_all.py) 223 | | np.where: Replace Elements of a NumPy Array Based on a Condition | [link](https://mathdatasimplified.com/2021/03/20/np-where-transform-values-of-a-numpy-array-using-conditions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_where.py) 224 | | array-to-latex: Turn a Numpy Array into Latex | [link](https://mathdatasimplified.com/2021/06/23/array_to_latex-turn-a-numpy-array-into-latex/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/array_to_latex_example.ipynb) 225 | | Numpy Comparison Operators | [link](https://mathdatasimplified.com/2021/07/15/numpy-comparison-operators/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_comparison.py) 226 | | NumPy.linspace: Get Evenly Spaced Numbers Over a Specific Interval | [link](https://mathdatasimplified.com/2021/08/05/numpy-linspace-get-evenly-spaced-numbers-over-a-specific-interval/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/np_linspace.py) 227 | | NumPy.testing.assert_almost_equal: Check If Two Arrays Are Equal up to a Certain Precision | [link](https://mathdatasimplified.com/2021/08/12/numpy-testing-assert_almost_equal-check-if-two-arrays-are-equal-up-to-a-certain-precision/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/numpy/numpy_testing_almost_equal.py) 228 | 229 | 230 |

Data Science Tools

231 | 232 | ### Testing 233 | 234 | | Title | Explanation | Code | 235 | | ------------- |:-------------:| :-----:| 236 | | snoop : Smart Print to Debug your Python Function | [link](https://mathdatasimplified.com/2021/05/28/snoop-smart-print-to-debug-your-python-function/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/snoop_example.py) 237 | | pytest benchmark: A Pytest Fixture to Benchmark your Code | [link](https://mathdatasimplified.com/2021/05/19/pytest-benchmark-a-pytest-fixture-to-benchmark-your-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_benchmark.py) 238 | | pytest.mark.parametrize: Test your Functions with Multiple Inputs | [link](https://mathdatasimplified.com/2021/06/09/pytest-mark-parametrize-test-your-functions-with-multiple-inputs/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_parametrize.py) 239 | | Pytest: Shows only Failed Tests | [link](https://mathdatasimplified.com/2021/01/27/pytest-shows-only-failed-tests/) 240 | | Pytest Fixtures: Use the same data for different tests | [link](https://mathdatasimplified.com/2020/12/05/pytest-fixtures-use-the-same-data-for-different-tests/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_fixture.py) 241 | |Pytest repeat | [link](https://mathdatasimplified.com/2020/11/23/pytest-repeat/)|[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytest_repeat.py) 242 | | Pandera: a Python Library to Validate Your Pandas DataFrame | [link](https://mathdatasimplified.com/2021/01/17/pandera-a-python-library-to-validate-your-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pandera_example.py) 243 | 244 | ### Data 245 | 246 | | Title | Explanation | Code | 247 | | ------------- |:-------------:| :-----:| 248 | | faker: Create Fake Data in One Line of Code |[link](https://mathdatasimplified.com/2021/05/14/faker-create-fake-data-in-one-line-of-code/)|[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/faker_example.py) 249 | | DVC: A Data Version Control Tool for your Data Science Projects | [link](https://mathdatasimplified.com/2021/05/06/dvc-a-data-version-control-tool-for-your-data-science-projects/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/dvc_example.sh) 250 | | fetch_openml: Get OpenML’s Dataset in One Line of Code | [link](https://mathdatasimplified.com/2021/04/23/fetch_openml-get-openmls-dataset-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/fetch_openml_example.py) 251 | | github-to-sqlite: Download the Data of your Starred GitHub Repositories in One Command Line | [link](https://mathdatasimplified.com/2021/03/30/github-to-sqlite-download-the-data-of-your-starred-github-repositories-in-one-command-line/) 252 | | Autoscraper | [link](https://mathdatasimplified.com/2020/11/23/autoscraper/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/autoscraper_example.py) 253 | | Extract series data from various Internet sources directly into a pandas DataFrame | [link](https://mathdatasimplified.com/2020/11/23/extract-series-data-from-various-internet-sources-directly-into-a-pandas-dataframe/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/extract_various_data.py) 254 | | Compare the similar features between 2 different datasets | [link](https://mathdatasimplified.com/2020/11/23/compare-the-similar-features-between-2-different-datasets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/compare_2_datasets) 255 | | newspaper3k: Extract Meaningful Information From an Articles in 2 Lines of Code | [link](https://mathdatasimplified.com/2021/03/23/newspaper3k-extract-meaningful-information-from-an-articles-in-2-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/newspaper3k.ipynb) 256 | | distfit: Find The Best Theoretical Distribution For Your Data in Python | [link](https://mathdatasimplified.com/2021/09/08/distfit-find-the-best-theoretical-distribution-for-your-data-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/distfit_example.py) 257 | 258 | ### Feature extraction 259 | 260 | | Title | Explanation | Code | 261 | | ------------- |:-------------:| :-----:| 262 | | datefinder: Automatically Find Dates and Time in a Python String | [link](https://mathdatasimplified.com/2021/05/08/datefinder-automatically-find-dates-and-time-in-a-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/datefinder_example.py) 263 | | dill’s getname: Get Names a Python Object | [link](https://mathdatasimplified.com/2021/04/29/dills-getname-get-names-a-python-object/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/getname_example.py) 264 | | pytrend: Get the Trend of a Keyword on Google Search Over Time | [link](https://mathdatasimplified.com/2021/04/12/pytrend-get-the-trend-of-a-keyword-on-google-search-over-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pytrends_example.ipynb) 265 | | add_datepart: Add Relevant DateTime Features in One Line of Code | [link](https://mathdatasimplified.com/2021/02/11/add_datepart-add-relevant-datetime-features-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/add_datepart_example.py) 266 | | Geopy: Extract Location Based on Python String | [link](https://mathdatasimplified.com/2020/12/08/geopy-extract-location-based-on-python-string/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/geopy_example.py) 267 | | Maya: Convert the string to datetime automatically | [link](https://mathdatasimplified.com/2020/11/23/maya-convert-the-string-to-datetime-automatically/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/maya_example.py) 268 | | Select the features by their relevance | [link](https://mathdatasimplified.com/2020/11/23/select-the-features-by-their-relevance/) 269 | | Extract holiday from date column | [link](https://mathdatasimplified.com/2020/11/23/extract-holiday-from-date-column/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/extract_holidays.py) 270 | | fastai’s cont_cat_split: Get a DataFrame’s Continuous and Categorical Variables Based on Their Cardinality | [link](https://mathdatasimplified.com/2021/07/16/fastais-cont_cat_split-get-a-dataframes-continuous-and-categorical-variables-based-on-their-cardinality/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/fastai_cont_cat_split.py) 271 | 272 | ### Visualization 273 | 274 | | Title | Explanation | Code | 275 | | ------------- |:-------------:| :-----:| 276 | | D-Tale: A Python Library to Visualize and Analyze your Data Without Code | [link](https://mathdatasimplified.com/2021/05/16/d-tale-a-python-library-to-visualize-and-analyze-your-data-without-code/) 277 | | Graphviz: Create a Flowchart to Capture your Ideas in Python | [link](https://mathdatasimplified.com/2021/02/06/graphviz-create-a-flowchart-to-capture-your-ideas-in-python/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/environment_variables) 278 | | Create an interactive map in Python | [link](https://mathdatasimplified.com/2020/12/03/create-an-interactive-map-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/folium_example.ipynb) 279 | | dtreeviz: Visualize and Interpret a Decision Tree Model | [link](https://mathdatasimplified.com/2021/09/01/dtreeviz-visualize-and-interpret-a-decision-tree-model/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/dtreeviz_example.py) 280 | 281 | ### Sharing and Downloading 282 | 283 | | Title | Explanation | Code | 284 | | ------------- |:-------------:| :-----:| 285 | | Datapane: Publish your Python Objects on the Web in 2 Lines of Code | [link](https://mathdatasimplified.com/2021/04/25/datapane-publish-your-python-objects-on-the-web-in-2-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/datapane_example.py) 286 | | gdown: Download a File from Google Drive in Python | [link](https://mathdatasimplified.com/2021/01/04/gdown-download-a-file-from-google-drive-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/gdown_example.py) 287 | 288 | ### Natural Language Processing 289 | 290 | | Title | Explanation | Code | 291 | | ------------- |:-------------:| :-----:| 292 | | TextBlob: Processing Text in One Line of Code | [link](https://mathdatasimplified.com/2021/04/16/textblob-processing-text-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/textblob_example.py) 293 | | sumy: Summarize Text in One Line of Code | [link](https://mathdatasimplified.com/2021/03/10/sumy-summarize-text-in-one-line-of-code/) 294 | | Spacy_streamlit: Create a Web App to Visualize your Text in 3 Lines of Code | [link](https://mathdatasimplified.com/2020/12/29/spacy_streamlit-create-a-web-app-to-visualize-your-text-in-3-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/streamlit_app.py) 295 | | Extract a contiguous sequence of 2 words | [link](https://mathdatasimplified.com/2020/11/23/extract-a-contiguous-sequence-of-2-words/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/spacy_ngram.py) 296 | | Detect the “almost similar” articles | [link](https://mathdatasimplified.com/2020/11/23/detect-the-almost-similar-articles/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/difflib_example.py) 297 | | Convert number to words | [link](https://mathdatasimplified.com/2020/11/23/convert-number-to-words/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/convert_number_to_words.py) 298 | | texthero.clean: Preprocess Text in One Line of Code | [link](https://mathdatasimplified.com/2021/07/30/texthero-clean-preprocess-text-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/texthero_examples.py) 299 | | wordfreq: Estimate the Frequency of a Word in 36 Languages | [link](https://mathdatasimplified.com/2021/09/10/wordfreq-estimate-the-frequency-of-a-word-in-36-languages/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/wordfreq_example.py) 300 | 301 | ### Tools for Best Python Practices 302 | 303 | | Title | Explanation | Code | 304 | | ------------- |:-------------:| :-----:| 305 | | Don’t Hard-Code. Use Hydra Instead | [link](https://mathdatasimplified.com/2021/04/08/dont-hard-code-use-hydra-instead/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/hydra_example) 306 | | python-dotenv: How to Load the Secret Information from .env File | [link](https://mathdatasimplified.com/2021/02/20/python-dotenv-how-to-load-the-secret-information-from-env-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/dotenv_example) 307 | | kedro pipeline: Create Pipeline for your Data Science Projects in Python | [link](https://mathdatasimplified.com/2021/02/03/kedro-pipeline-create-pipeline-for-your-data-science-projects-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/kedro_example.py) 308 | | docopt: Create Beautiful Command-line Interfaces for Documentation in Python | [link](https://mathdatasimplified.com/2021/03/18/docopt-create-beautiful-command-line-interfaces-for-documentation-in-python/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/docopt_example.py) 309 | 310 | ### Speed Up Code 311 | 312 | | Title | Explanation | Code | 313 | | ------------- |:-------------:| :-----:| 314 | | fastai’s df_shrink: Shrink DataFrame’s Memory Usage in One Line of Code | [link](https://mathdatasimplified.com/2021/02/24/fastais-df_shrink-shrink-dataframes-memory-usage-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/fastai_df_shrink.py) 315 | | Swifter: Add One Word to Make your Pandas Apply 23 Times Faster | [link](https://mathdatasimplified.com/2021/01/13/swifter-add-one-word-to-make-your-pandas-apply-23-times-faster/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/swifter_example.py) 316 | 317 | ### Better Pandas 318 | 319 | | Title | Explanation | Code | 320 | | ------------- |:-------------:| :-----:| 321 | | rich-dataframe: Create Animated and Colorful Pandas Dataframe | [link](https://mathdatasimplified.com/2021/02/17/rich-dataframe-create-animated-and-colorful-pandas-dataframe/) 322 | | tqdm: Add Progress Bar to your Pandas Apply | [link](https://mathdatasimplified.com/2020/12/30/tqdm-add-progress-bar-to-your-pandas-apply/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/tqdm_example.py) 323 | | tqdm.set_description: Set a Description for Your Progress Bar | [link](https://mathdatasimplified.com/2021/08/18/tqdm-set_description-set-a-description-for-your-progress-bar/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/tqdm_set_description.py) 324 | 325 | ### Machine Learning 326 | 327 | | Title | Explanation | Code | 328 | | ------------- |:-------------:| :-----:| 329 | | causalimpact: Find Causal Relation of an Event and a Variable in Python | [link](https://mathdatasimplified.com/2021/01/25/causalimpact-find-causal-relation-of-an-event-and-a-variable-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/causalimpact_example.ipynb) 330 | | Pipeline + GridSearchCV: Prevent Data Leakage when Scaling the Data | [link](https://mathdatasimplified.com/2020/12/27/pipeline-gridsearchcv-prevent-data-leakage-when-scaling-the-data/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/pipeline_gridsearchcv.py) 331 | | Decompose high dimensional data into two or three dimensions | [link](https://mathdatasimplified.com/2020/11/23/decompose-high-dimensional-data-into-two-or-three-dimensions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/data_science_tools/decompose_high_dementional_data.ipynb) 332 | | Cross Validation with Time Series | [link](https://mathdatasimplified.com/2020/11/23/cross-validation-with-time-series/) 333 | | squared=False: Get RMSE from Sklearn’s mean_squared_error method | [link](https://mathdatasimplified.com/2021/08/13/squaredfalse-get-rmse-from-sklearns-mean_squared_error-method/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/data_science_tools/sklearn_rmse.py) 334 | 335 |

Terminal

336 | 337 | ### Text 338 | 339 | | Title | Explanation | Code | 340 | | ------------- |:-------------:| :-----:| 341 | | tr Command: Translate Characters to Improve Readability In Unix/Linux | [link](https://mathdatasimplified.com/2021/04/05/tr-command-translate-characters-to-improve-readability-in-unix-linux/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/tr_command.sh) 342 | | Sed Command: Replace a string with another string on the command line | [link](https://mathdatasimplified.com/2020/12/17/sed-command-replace-a-string-with-another-string-on-the-command-line/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/sed_command) 343 | 344 | ### Files 345 | 346 | | Title | Explanation | Code | 347 | | ------------- |:-------------:| :-----:| 348 | | fd: a Simple Tool to Search for Files or Directories Fast | [link](https://mathdatasimplified.com/2021/04/09/fd-a-simple-tool-to-search-for-files-or-directories-fast/) 349 | | ln -s: Create Symbolic Link Between 2 Files | [link](https://mathdatasimplified.com/2021/04/11/ln-s-create-symbolic-link-between-2-files/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/create_symbolic_link.sh) 350 | | tee: Save Command Output to a File | [link](https://mathdatasimplified.com/2021/03/06/tee-save-command-output-to-a-file/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/tee_example.sh) 351 | | Make Important Files Impossible to be Deleted | [link](https://mathdatasimplified.com/2021/01/15/make-important-files-impossible-to-be-deleted/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/make_file_immutable.sh) 352 | | View tree structure of your file | [link](https://mathdatasimplified.com/2020/11/23/view-tree-structure-of-your-file/) 353 | 354 | ### Tracking 355 | 356 | | Title | Explanation | Code | 357 | | ------------- |:-------------:| :-----:| 358 | | timeit on the Command Line: Measure Execution Time of Small Code Snippets | [link](https://mathdatasimplified.com/2021/05/25/timeit-on-the-command-line-measure-execution-time-of-small-code-snippets/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/timeit_command_line.sh) 359 | | Time Command: Track the Time it Takes to Execute a File in Linux | [link](https://mathdatasimplified.com/2021/01/24/time-command-track-the-time-it-takes-to-execute-a-file-in-linux/) 360 | | htop | [link](https://mathdatasimplified.com/2020/11/23/htop/) 361 | 362 | ### Python 363 | 364 | | Title | Explanation | Code | 365 | | ------------- |:-------------:| :-----:| 366 | | Python Shell as an Calculator: Grab the Last Output Using “_” | [link](https://mathdatasimplified.com/2021/03/16/python-shell-as-an-calculator-grab-the-last-output-using-_/) 367 | | Find version of a Python library using pip list and grep | [link](https://mathdatasimplified.com/2020/12/04/find-version-of-a-python-library-using-pip-list-and-grep/) 368 | | Conda rollback to the last revision | [link](https://mathdatasimplified.com/2020/11/23/conda-rollback-to-the-last-revision/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/conda_rollback.sh) 369 | | How to Check Whether a Library is Installed | [link](https://mathdatasimplified.com/2020/11/23/1006/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/check_if_library_is_installed.sh) 370 | | pydash.chunk: Split Elements in a List into Groups of n Items | [link](https://mathdatasimplified.com/2021/08/11/pydash-chunk-split-elements-in-a-list-into-groups-of-n-items/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/pydash_chunk.py) 371 | ### Prettify Terminal 372 | 373 | | Title | Explanation | Code | 374 | | ------------- |:-------------:| :-----:| 375 | | colorls: Beautify your ls Command with Color and Icons | [link](https://mathdatasimplified.com/2021/04/18/colorls-beautify-your-ls-command-with-color-and-icons/) 376 | | Colorama: Produce a colored terminal text in Python | [link](https://mathdatasimplified.com/2020/12/13/colorama-produce-a-colored-terminal-text-in-python/) 377 | 378 | ### Sharing 379 | 380 | | Title | Explanation | Code | 381 | | ------------- |:-------------:| :-----:| 382 | | terminalizer: Record and Share your Terminal Sessions | [link](https://mathdatasimplified.com/2021/03/08/termanalizer-record-and-share-your-terminal-sessions/) 383 | 384 | ### Productive Hacks 385 | 386 | | Title | Explanation | Code | 387 | | ------------- |:-------------:| :-----:| 388 | | Bash For Loop: Stop Staring at your Screen. Write a Bash For Loop instead | [link](https://mathdatasimplified.com/2021/03/29/bash-for-loop-stop-staring-at-your-screen-write-a-bash-for-loop-instead/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/bash_for_loop.sh) 389 | | Environment Variables: Save Private Information in your Local Machine | [link](https://mathdatasimplified.com/2021/02/07/environment-variables-save-private-information-in-your-local-machine/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/environment_variables) 390 | | Pet: A Command-line Snippet Tool That Allows you to Store your Favorite Commands | [link](https://mathdatasimplified.com/2021/01/17/pet-a-command-line-snippet-tool-that-allows-you-to-store-your-favorite-commands/) 391 | | Loop through a list of data on your terminal | [link](https://mathdatasimplified.com/2020/11/23/loop-through-a-list-of-data-on-your-terminal/) 392 | | Multi-run command | [link](https://mathdatasimplified.com/2020/11/23/multi-run-command/) 393 | | Run multiple commands in one line of code | [link](https://mathdatasimplified.com/2020/11/23/run-multiple-commands-in-one-line-of-code/) 394 | 395 |

Cool Tools

396 | 397 | ### Better Output 398 | 399 | | Title | Explanation | Code | 400 | | ------------- |:-------------:| :-----:| 401 | | How to Strip Outputs and Execute Interactive Code in a Python Script | [link](https://mathdatasimplified.com/2021/05/12/how-to-strip-outputs-and-execute-interactive-code-in-a-python-script/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/strip_interactive_example.py) 402 | | rich.inspect: Produce a Beautiful Report on any Python Object | [link](https://mathdatasimplified.com/2021/04/28/rich-inspect-produce-a-beautiful-report-on-any-python-object/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/rich_inspect.py) 403 | | Rich’s Console: Debug your Python Function in One Line of Code | [link](https://mathdatasimplified.com/2021/02/12/richs-console-debug-your-python-function-in-one-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/rich_console.py) 404 | | loguru: Print Readable Traceback in Python | [link](https://mathdatasimplified.com/2021/01/23/loguru-print-readable-traceback-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/loguru_example.py) 405 | | Icecream: Adding a Datetime Stamp to Python print | [link](https://mathdatasimplified.com/2021/01/15/icecream-adding-a-datetime-stamp-to-python-print/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/icecream_datetime.py) 406 | | Icrecream: Never use print() to debug again | [link](https://mathdatasimplified.com/2021/01/01/icrecream-never-use-print-to-debug-again/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/icecream_example.py) 407 | | Pyfiglet: Make large and unique letters out of ordinary text in Python | [link](https://mathdatasimplified.com/2020/12/22/pyfiglet-make-large-and-unique-letters-out-of-ordinary-text-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/pyfiglet_example.py) 408 | | heartrate — Visualize the Execution of a Python Program in Real-Time | [link](https://mathdatasimplified.com/2021/06/25/heartrate-visualize-the-execution-of-a-python-program-in-real-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/heart_rate.py) 409 | 410 | ### Tracking 411 | 412 | | Title | Explanation | Code | 413 | | ------------- |:-------------:| :-----:| 414 | | Stacer: Visualize the History of your CPU and Memory Usage | [link](https://mathdatasimplified.com/2021/05/02/stacer-visualize-the-history-of-your-cpu-and-memory-usage/) 415 | 416 | ### Data 417 | 418 | | Title | Explanation | Code | 419 | | ------------- |:-------------:| :-----:| 420 | | sherlock: Search for a Username Across 298 Popular Website | [link](https://mathdatasimplified.com/2021/03/09/sherlock-search-for-a-username-across-298-popular-websites/) 421 | | getme forecast: Get the Weather Forecast Through your Terminal | [link](https://mathdatasimplified.com/2021/01/10/getme-forecast-get-the-weather-forecast-through-your-terminal/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/getme_forecast.sh) 422 | 423 | ### Automation 424 | 425 | | Title | Explanation | Code | 426 | | ------------- |:-------------:| :-----:| 427 | | notion-py: Access and Edit your Notion App Using Python | [link](https://mathdatasimplified.com/2021/04/01/notion-py-access-and-edit-your-notion-app-using-python/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/notion_example.py) 428 | | organize: Automate Organizing Files with Command Line | [link](https://mathdatasimplified.com/2021/03/14/organize-automate-organizing-files-with-command-line/) 429 | | Schedule: Schedule your Python Functions to Run At a Specific Time | [link](https://mathdatasimplified.com/2021/01/30/schedule-schedule-your-python-functions-to-run-at-a-specific-time/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/schedule_example.py) 430 | | notify-send: Send a Desktop Notification after Finishing Executing a File | [link](https://mathdatasimplified.com/2021/01/20/notify-send-send-a-desktop-notification-after-finishing-executing-a-file/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/notify_send.sh) 431 | | isort: Automatically Sort your Python Imports in 1 Line of Code | [link](https://mathdatasimplified.com/2021/01/06/isort-automatically-sort-your-python-imports-in-1-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/isort_example.py) 432 | | knockknock: Receive an email when your code finishes executing | [link](https://mathdatasimplified.com/2020/11/23/knockknock-receive-an-email-when-your-code-finishes-executing/)| [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/knockknock_example.py) 433 | | snsscrape: Scrape Social Networking Services in Python | [link](https://mathdatasimplified.com/2021/06/30/snsscrape-scrape-social-networking-services-in-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/snsscrape_example.sh) 434 | | Typer: Build a Command-Line Interface in a Few Lines of Code | [link](https://mathdatasimplified.com/2021/07/14/typer-build-a-command-line-interface-in-a-few-lines-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/box_example.py) 435 | | yarl: Create and Extract Elements from a URL Using Python | [link](https://mathdatasimplified.com/2021/07/21/yarl-create-and-extract-elements-from-a-url-using-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/yarl_example.py) 436 | | interrogate: Check your Python Code for Missing Docstrings | [link](https://mathdatasimplified.com/2021/08/06/interrogate-check-your-python-code-for-missing-docstrings/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/interrogate_example.py) 437 | | mypy: Static Type Checker for Python | [link](https://mathdatasimplified.com/2021/08/23/mypy-static-type-checker-for-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/mypy_example.py) 438 | 439 | ### Git and GitHub 440 | 441 | | Title | Explanation | Code | 442 | | ------------- |:-------------:| :-----:| 443 | | Github CLI: Brings GitHub to your Terminal | [link](https://mathdatasimplified.com/2021/02/21/github-cli-brings-github-to-your-terminal/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/gh_cli.sh) 444 | | Pull one file from another branch using git | [link](https://mathdatasimplified.com/2020/11/23/pull-one-file-from-another-branch-using-git/) 445 | | Download a file on Github using wget | [link](https://mathdatasimplified.com/2020/11/23/download-a-file-on-github-using-wget/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/download_github_file.sh) 446 | | github1s: Read GitHub Code with VS Code on your Browser in One Second | [link](https://mathdatasimplified.com/2021/02/15/github1s-read-github-code-with-vs-code-on-your-browser-in-one-second/) 447 | | PyGithub: Manage your Github resources using Python | [link](https://mathdatasimplified.com/2020/12/24/pygithub-manage-your-github-resources-using-python/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/pygithub_example.py) 448 | | Astral: Organize your Github stars with ease | [link](https://mathdatasimplified.com/2020/12/18/astral-organize-your-github-stars-with-ease/) 449 | | pip install -e: Install Forked GitHub Repository using Pip | [link](https://mathdatasimplified.com/2021/07/28/pip-install-e-install-forked-github-repository-using-pip/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/cool_tools/install_forked.sh) 450 | 451 | ### Alternative Approach 452 | 453 | | Title | Explanation | Code | 454 | | ------------- |:-------------:| :-----:| 455 | | Box: Using Dot Notation to Access Keys in a Python Dictionary | [link](https://mathdatasimplified.com/2021/03/02/box-using-dot-notation-to-access-keys-in-a-python-dictionary/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/box_example.py) 456 | | decorator module: Write Shorter Python Decorators without Nested Functions | [link](https://mathdatasimplified.com/2021/03/27/decorator-module-write-shorter-python-decorators-without-nested-functions/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/decorator_module.py) 457 | | virtualenv-clone: Create a Copy of a Virtual Environment | [link](https://mathdatasimplified.com/2021/02/01/virtualenv-clone-create-a-copy-of-a-virtual-environment/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/terminal/virtualenv_clone.sh) 458 | 459 |

Jupyter Notebook

460 | 461 | | Title | Explanation | Code | 462 | | ------------- |:-------------:| :-----:| 463 | | nbdime: Better Version Control for Jupyter Notebook | [link](https://mathdatasimplified.com/2021/06/04/nbdime-better-version-control-for-jupyter-notebook/) 464 | | display in IPython: Display math equations in Jupyter Notebook | [link](https://mathdatasimplified.com/2021/03/01/display-in-ipython-display-math-equations-in-jupyter-notebook/) |[link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/notebook/display_math_equations.ipynb) 465 | | Reuse the notebook to run the same code across different data | [link](https://mathdatasimplified.com/2020/11/23/reuse-the-notebook-to-run-the-same-code-across-different-data/) 466 | | ngrok: Create a Public Server for your Jupyter Notebook in 1 Line of Code | [link](https://mathdatasimplified.com/2021/05/26/ngrok-create-a-public-server-for-your-jupyter-notebook-in-1-line-of-code/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/tree/master/code_snippets/cool_tools/ngrok_example.sh) 467 | | watermark: Get Information About Your Hardware and the Packages Being Used within Your Notebook | [link](https://mathdatasimplified.com/2021/07/07/watermark-get-information-about-your-hardware-and-the-packages-being-used-within-your-notebook/) | [link](https://github.com/khuyentran1401/Python-data-science-code-snippet/blob/master/code_snippets/notebook/watermark_example.ipynb) 468 | --------------------------------------------------------------------------------