├── README.md └── demo_python_regex_extract_text.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # demo-python-regex-extract-text -------------------------------------------------------------------------------- /demo_python_regex_extract_text.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "demo-python-regex-extract-text.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "view-in-github", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "\"Open" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "jEZbvf913EPz", 31 | "colab_type": "code", 32 | "colab": {} 33 | }, 34 | "source": [ 35 | "import re" 36 | ], 37 | "execution_count": 0, 38 | "outputs": [] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "metadata": { 43 | "id": "PY4T9E5J3JTo", 44 | "colab_type": "code", 45 | "colab": {} 46 | }, 47 | "source": [ 48 | "data = \"\"\"张华考上了北京大学\n", 49 | "李萍进了中等技术学校\n", 50 | "韩梅梅进了百货公司\n", 51 | "他们都有光明的前途\"\"\"" 52 | ], 53 | "execution_count": 0, 54 | "outputs": [] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "metadata": { 59 | "id": "fyYW18Cv3U6s", 60 | "colab_type": "code", 61 | "colab": {} 62 | }, 63 | "source": [ 64 | "regex = r\"(.*)[考|进].*了(.*)\"" 65 | ], 66 | "execution_count": 0, 67 | "outputs": [] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "metadata": { 72 | "id": "C_KTX2vG37AZ", 73 | "colab_type": "code", 74 | "colab": {} 75 | }, 76 | "source": [ 77 | "mylist = []" 78 | ], 79 | "execution_count": 0, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "metadata": { 85 | "id": "yvR4EkU-3Zrb", 86 | "colab_type": "code", 87 | "colab": {} 88 | }, 89 | "source": [ 90 | "for line in data.split('\\n'):\n", 91 | " mysearch = re.search(regex, line)\n", 92 | " if mysearch:\n", 93 | " name = mysearch.group(1)\n", 94 | " dest = mysearch.group(2)\n", 95 | " mylist.append((name, dest))\n" 96 | ], 97 | "execution_count": 0, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "83a2xdRg3_jt", 104 | "colab_type": "code", 105 | "colab": {} 106 | }, 107 | "source": [ 108 | "mylist" 109 | ], 110 | "execution_count": 0, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "metadata": { 116 | "id": "7DnDzUTn4BDW", 117 | "colab_type": "code", 118 | "colab": {} 119 | }, 120 | "source": [ 121 | "import pandas as pd" 122 | ], 123 | "execution_count": 0, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "metadata": { 129 | "id": "gWfTZ2qp4CiP", 130 | "colab_type": "code", 131 | "colab": {} 132 | }, 133 | "source": [ 134 | "df = pd.DataFrame(mylist)\n", 135 | "df.columns = ['姓名', '去向']" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "metadata": { 143 | "id": "e9ti6-8m4FG-", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "source": [ 148 | "df" 149 | ], 150 | "execution_count": 0, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "Ae9puoPW4ODV", 157 | "colab_type": "code", 158 | "colab": {} 159 | }, 160 | "source": [ 161 | "df.to_excel(\"dest.xlsx\", index=False)" 162 | ], 163 | "execution_count": 0, 164 | "outputs": [] 165 | } 166 | ] 167 | } --------------------------------------------------------------------------------