├── weibo ├── __init__.py ├── spiders │ ├── __init__.py │ ├── image.py │ └── video.py ├── configs.example.py ├── items.py ├── api.py ├── utils.py ├── pipelines.py ├── settings.py └── middlewares.py ├── debug.py ├── scrapy.cfg ├── README.md ├── LICENSE └── .gitignore /weibo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debug.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is for debugging only. 3 | """ 4 | from scrapy.cmdline import execute 5 | 6 | if __name__ == '__main__': 7 | cmd = 'scrapy crawl video' 8 | execute(cmd.split()) 9 | -------------------------------------------------------------------------------- /weibo/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = weibo.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = weibo 12 | -------------------------------------------------------------------------------- /weibo/configs.example.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains weibo configs for the spider. 3 | """ 4 | 5 | STORE_PATH = './downloads' 6 | CACHE_FILE = f'{STORE_PATH}/cache.pkl' 7 | 8 | COOKIES = 'key=value; key=value; ...' 9 | 10 | TARGETS = [ 11 | 'https://weibo.com/u/0000000000', 12 | 'https://weibo.com/u/1111111111', 13 | ] 14 | -------------------------------------------------------------------------------- /weibo/items.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your scraped items 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/items.html 5 | 6 | import scrapy 7 | 8 | 9 | class WeiboItem(scrapy.Item): 10 | uuid = scrapy.Field() # as the cache key 11 | filename = scrapy.Field() 12 | file_urls = scrapy.Field() 13 | files = scrapy.Field() 14 | -------------------------------------------------------------------------------- /weibo/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains weibo's api links. 3 | """ 4 | 5 | 6 | def info(uid: str) -> str: 7 | return f'https://weibo.com/ajax/profile/info?uid={uid}' 8 | 9 | 10 | def get_image_wall(uid: str, since: str = '0') -> str: 11 | return f'https://weibo.com/ajax/profile/getImageWall?uid={uid}&sinceid={since}' 12 | 13 | 14 | def get_water_fall(uid: str, cursor: str = '0') -> str: 15 | return f'https://weibo.com/ajax/profile/getWaterFallContent?uid={uid}&cursor={cursor}' 16 | 17 | 18 | def large_image(pid: str, cdn: int = 1) -> str: 19 | return f'https://wx{cdn}.sinaimg.cn/large/{pid}' 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Weibo Album Crawler 2 | 3 | ![python](https://img.shields.io/badge/python-3.10-blue) 4 | ![scrapy](https://img.shields.io/badge/scrapy-v2.5-blue) 5 | 6 | 基于 Scrapy 的新浪微博爬虫,支持相册、视频等。 7 | 8 | ## 设置环境 9 | 10 | ```shell 11 | conda create -n weibo python=3.10 12 | conda activate weibo 13 | pip install scrapy 14 | ``` 15 | 16 | ## 配置爬虫 17 | 18 | * `weibo/settings.py` 19 | * 并发请求数 `CONCURRENT_REQUESTS` 20 | * 视频下载目录 `FILES_STORE` 21 | 22 | * `weibo/configs.py` 23 | * 生成配置文件 `cp weibo/configs.example.py weibo/configs.py` 24 | * 手动复制粘贴登录后的 cookies 至 `COOKIES` 25 | * 目标主页 `TARGETS` 26 | * 下载目录 `STORE_PATH` 27 | 28 | ## 运行 29 | 30 | ```shell 31 | scrapy crawl image 32 | scrapy crawl video 33 | ``` 34 | -------------------------------------------------------------------------------- /weibo/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from scrapy import logformatter 5 | 6 | 7 | class LogFormatter(logformatter.LogFormatter): 8 | """ 9 | Set DropItem to the debug level because we need to drop a lot of items. 10 | """ 11 | 12 | def dropped(self, item, exception, response, spider): 13 | formatter = super().dropped(item, exception, response, spider) 14 | formatter['level'] = logging.DEBUG 15 | return formatter 16 | 17 | 18 | def prepare_folder(uid: str, uname: str, store_dir: str): 19 | """ 20 | Migrate a target's folder (with the ald uname, if any) to the new name. 21 | This is useful if some target changed their name. 22 | """ 23 | new_folder = f'{uid}_{uname}' 24 | 25 | for old_folder in os.listdir(store_dir): 26 | if old_folder.startswith(f'{uid}_') and old_folder != new_folder: 27 | src = os.path.join(store_dir, old_folder) 28 | dst = os.path.join(store_dir, new_folder) 29 | os.rename(src, dst) 30 | break 31 | 32 | return new_folder 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Yue Gao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /weibo/spiders/image.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from urllib.parse import urlparse 3 | 4 | import scrapy 5 | 6 | from weibo import api, utils 7 | from weibo import configs 8 | from weibo.items import WeiboItem 9 | 10 | 11 | class ImageSpider(scrapy.Spider): 12 | name = 'image' 13 | allowed_domains = ['weibo.com'] 14 | 15 | def start_requests(self): 16 | for target in configs.TARGETS: 17 | uid = os.path.basename(urlparse(target.rstrip('/')).path) 18 | yield scrapy.Request(api.info(uid), callback=self.parse_info) 19 | 20 | def parse_info(self, response): 21 | # prepare data 22 | user = response.json()['user'] 23 | uid, uname = user['id'], user['screen_name'] 24 | 25 | # prepare user folder 26 | folder = utils.prepare_folder(uid, uname, configs.STORE_PATH) 27 | 28 | # start from the 1st page 29 | meta = {'uid': uid, 'folder': folder} 30 | yield scrapy.Request(api.get_image_wall(uid), callback=self.parse_image_wall, meta=meta) 31 | 32 | def parse_image_wall(self, response): 33 | # prepare data 34 | data = response.json() 35 | uid, folder = response.meta['uid'], response.meta['folder'] 36 | 37 | # continue to next page 38 | since = data['since_id'] 39 | yield scrapy.Request(api.get_image_wall(uid, since), callback=self.parse_image_wall, meta=response.meta) 40 | 41 | # yield all images 42 | self.logger.info(f'{folder} found {len(data["list"]):2d} images (from {response.url})') 43 | for image in data['list']: 44 | pid, mid = image['pid'], image['mid'] 45 | filename = f'{folder}/{mid}_{pid}.jpg' 46 | yield WeiboItem(uuid=pid, filename=filename, file_urls=[api.large_image(pid)]) 47 | -------------------------------------------------------------------------------- /weibo/pipelines.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | from scrapy.exceptions import DropItem 5 | from scrapy.pipelines.files import FilesPipeline 6 | 7 | 8 | class WeiboMediaPipeline(FilesPipeline): 9 | """ 10 | Pipeline to download media files with specified filename. 11 | """ 12 | 13 | def file_path(self, request, response=None, info=None, *, item=None): 14 | return item['filename'] 15 | 16 | 17 | class BaseMediaKeyCachePipeline(object): 18 | """ 19 | Base class for key-cache pipelines. 20 | 21 | This pipeline cache keys of downloaded images / videos, so they will not be downloaded even if you deleted them. 22 | Useful if you want to delete unwanted files forever. 23 | """ 24 | 25 | # do we need to load existing key cache? 26 | preload_cache: bool 27 | 28 | def __init__(self, cache_file: str): 29 | self.cache_file = cache_file 30 | self.keys_seen = self.load_cache() if self.preload_cache else set() 31 | 32 | @classmethod 33 | def from_crawler(cls, crawler): 34 | return cls(crawler.settings['CACHE_FILE']) 35 | 36 | def load_cache(self): 37 | cache = set() 38 | if os.path.exists(self.cache_file): 39 | with open(self.cache_file, 'rb') as fp: 40 | cache = pickle.load(fp) 41 | return cache 42 | 43 | 44 | class MediaKeyDuplicatesPipeline(BaseMediaKeyCachePipeline): 45 | """ 46 | Pipeline to drop items having cached keys. 47 | Must be placed BEFORE downloading pipelines. 48 | """ 49 | 50 | # preload cache to check duplicates 51 | preload_cache = True 52 | 53 | def process_item(self, item, spider): 54 | if item['uuid'] in self.keys_seen: 55 | raise DropItem(f'Duplicate media key found in item.') 56 | return item 57 | 58 | 59 | class MediaKeyCachePipeline(BaseMediaKeyCachePipeline): 60 | """ 61 | Pipeline to cache keys of newly downloaded items. 62 | Must be placed AFTER downloading pipelines. 63 | """ 64 | 65 | # no need to preload cache for updating 66 | preload_cache = False 67 | 68 | def close_spider(self, spider): 69 | cache = self.keys_seen | self.load_cache() 70 | with open(self.cache_file, 'wb') as fp: 71 | pickle.dump(cache, fp) 72 | 73 | def process_item(self, item, spider): 74 | if item['files']: 75 | self.keys_seen.add(item['uuid']) 76 | return item 77 | -------------------------------------------------------------------------------- /weibo/spiders/video.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from urllib.parse import urlparse 3 | 4 | import scrapy 5 | 6 | from weibo import api, utils 7 | from weibo import configs 8 | from weibo.items import WeiboItem 9 | 10 | 11 | class VideoSpider(scrapy.Spider): 12 | name = 'video' 13 | allowed_domains = ['weibo.com'] 14 | download_warnsize = 100 << 20 # 100 MB 15 | download_timeout = 10 * 60 # 10 min 16 | video_keys = ['mp4_720p_mp4', 'mp4_hd_url', 'mp4_sd_url'] 17 | 18 | def start_requests(self): 19 | for target in configs.TARGETS: 20 | uid = os.path.basename(urlparse(target.rstrip('/')).path) 21 | yield scrapy.Request(api.info(uid), callback=self.parse_info) 22 | 23 | def parse_info(self, response): 24 | # prepare data 25 | user = response.json()['user'] 26 | uid, uname = user['id'], user['screen_name'] 27 | 28 | # prepare user folder 29 | folder = utils.prepare_folder(uid, uname, configs.STORE_PATH) 30 | 31 | # start from the first page 32 | meta = {'uid': uid, 'folder': folder} 33 | yield scrapy.Request(api.get_water_fall(uid), callback=self.parse_water_fall, meta=meta) 34 | 35 | def parse_water_fall(self, response): 36 | # prepare data 37 | data = response.json() 38 | uid, folder = response.meta['uid'], response.meta['folder'] 39 | 40 | # continue to next page 41 | cursor = data['next_cursor'] 42 | yield scrapy.Request(api.get_water_fall(uid, cursor), callback=self.parse_water_fall, meta=response.meta) 43 | 44 | # yield all videos 45 | for video in data['list']: 46 | video, mid = video['page_info'], video['mid'] 47 | video_type = video['object_type'] 48 | 49 | match video_type: 50 | case 'video': 51 | urls = [video['media_info'][key] for key in self.video_keys] 52 | url = urls[0] if urls else '' 53 | self.logger.info(f'{folder} found 1 video (from {response.url})') 54 | yield WeiboItem(uuid=mid, filename=f'{folder}/{mid}.mp4', file_urls=[url]) 55 | 56 | case 'story': 57 | for i, slide in enumerate(video['slide_cover']['slide_videos']): 58 | url = slide['url'] 59 | yield WeiboItem(uuid=f'{mid}_{i}', filename=f'{folder}/{mid}_{i}.mp4', file_urls=[url]) 60 | 61 | case _: 62 | self.logger.warning('Unknown video type "%s".', video_type) 63 | -------------------------------------------------------------------------------- /weibo/settings.py: -------------------------------------------------------------------------------- 1 | # Scrapy settings for weibo project 2 | # 3 | # For simplicity, this file contains only settings considered important or 4 | # commonly used. You can find more settings consulting the documentation: 5 | # 6 | # https://docs.scrapy.org/en/latest/topics/settings.html 7 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 8 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 9 | from weibo import configs 10 | 11 | BOT_NAME = 'weibo' 12 | 13 | SPIDER_MODULES = ['weibo.spiders'] 14 | NEWSPIDER_MODULE = 'weibo.spiders' 15 | 16 | 17 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 18 | #USER_AGENT = 'weibo (+http://www.yourdomain.com)' 19 | 20 | # Obey robots.txt rules 21 | ROBOTSTXT_OBEY = False 22 | 23 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 24 | #CONCURRENT_REQUESTS = 32 25 | 26 | # Configure a delay for requests for the same website (default: 0) 27 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay 28 | # See also autothrottle settings and docs 29 | #DOWNLOAD_DELAY = 3 30 | # The download delay setting will honor only one of: 31 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 32 | #CONCURRENT_REQUESTS_PER_IP = 16 33 | 34 | # Disable cookies (enabled by default) 35 | #COOKIES_ENABLED = False 36 | 37 | # Disable Telnet Console (enabled by default) 38 | #TELNETCONSOLE_ENABLED = False 39 | 40 | # Override the default request headers: 41 | #DEFAULT_REQUEST_HEADERS = { 42 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 43 | # 'Accept-Language': 'en', 44 | #} 45 | 46 | # Enable or disable spider middlewares 47 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html 48 | #SPIDER_MIDDLEWARES = { 49 | # 'weibo.middlewares.WeiboSpiderMiddleware': 543, 50 | #} 51 | 52 | # Enable or disable downloader middlewares 53 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 54 | DOWNLOADER_MIDDLEWARES = { 55 | 'weibo.middlewares.CustomCookiesMiddleware': 543, 56 | 'weibo.middlewares.WeiboAPIMiddleware': 543, 57 | } 58 | 59 | # Enable or disable extensions 60 | # See https://docs.scrapy.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html 67 | ITEM_PIPELINES = { 68 | 'weibo.pipelines.MediaKeyDuplicatesPipeline': 300, 69 | 'weibo.pipelines.WeiboMediaPipeline': 310, 70 | 'weibo.pipelines.MediaKeyCachePipeline': 320, 71 | } 72 | FILES_STORE = configs.STORE_PATH 73 | CACHE_FILE = configs.CACHE_FILE 74 | 75 | LOG_FORMATTER = 'weibo.utils.LogFormatter' 76 | 77 | # Enable and configure the AutoThrottle extension (disabled by default) 78 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html 79 | #AUTOTHROTTLE_ENABLED = True 80 | # The initial download delay 81 | #AUTOTHROTTLE_START_DELAY = 5 82 | # The maximum download delay to be set in case of high latencies 83 | #AUTOTHROTTLE_MAX_DELAY = 60 84 | # The average number of requests Scrapy should be sending in parallel to 85 | # each remote server 86 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 87 | # Enable showing throttling stats for every response received: 88 | #AUTOTHROTTLE_DEBUG = False 89 | 90 | # Enable and configure HTTP caching (disabled by default) 91 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 92 | #HTTPCACHE_ENABLED = True 93 | #HTTPCACHE_EXPIRATION_SECS = 0 94 | #HTTPCACHE_DIR = 'httpcache' 95 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 96 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 97 | -------------------------------------------------------------------------------- /weibo/middlewares.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your spider middleware 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 5 | from http.cookies import SimpleCookie 6 | from json import JSONDecodeError 7 | 8 | from scrapy import signals 9 | from scrapy.exceptions import IgnoreRequest 10 | 11 | from weibo import configs 12 | 13 | 14 | # useful for handling different item types with a single interface 15 | 16 | 17 | class WeiboSpiderMiddleware: 18 | # Not all methods need to be defined. If a method is not defined, 19 | # scrapy acts as if the spider middleware does not modify the 20 | # passed objects. 21 | 22 | @classmethod 23 | def from_crawler(cls, crawler): 24 | # This method is used by Scrapy to create your spiders. 25 | s = cls() 26 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 27 | return s 28 | 29 | def process_spider_input(self, response, spider): 30 | # Called for each response that goes through the spider 31 | # middleware and into the spider. 32 | 33 | # Should return None or raise an exception. 34 | return None 35 | 36 | def process_spider_output(self, response, result, spider): 37 | # Called with the results returned from the Spider, after 38 | # it has processed the response. 39 | 40 | # Must return an iterable of Request, or item objects. 41 | for i in result: 42 | yield i 43 | 44 | def process_spider_exception(self, response, exception, spider): 45 | # Called when a spider or process_spider_input() method 46 | # (from other spider middleware) raises an exception. 47 | 48 | # Should return either None or an iterable of Request or item objects. 49 | pass 50 | 51 | def process_start_requests(self, start_requests, spider): 52 | # Called with the start requests of the spider, and works 53 | # similarly to the process_spider_output() method, except 54 | # that it doesn’t have a response associated. 55 | 56 | # Must return only requests (not items). 57 | for r in start_requests: 58 | yield r 59 | 60 | def spider_opened(self, spider): 61 | spider.logger.info('Spider opened: %s' % spider.name) 62 | 63 | 64 | class WeiboDownloaderMiddleware: 65 | # Not all methods need to be defined. If a method is not defined, 66 | # scrapy acts as if the downloader middleware does not modify the 67 | # passed objects. 68 | 69 | @classmethod 70 | def from_crawler(cls, crawler): 71 | s = cls() 72 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 73 | return s 74 | 75 | def process_request(self, request, spider): 76 | # Called for each request that goes through the downloader 77 | # middleware. 78 | 79 | # Must either: 80 | # - return None: continue processing this request 81 | # - or return a Response object 82 | # - or return a Request object 83 | # - or raise IgnoreRequest: process_exception() methods of 84 | # installed downloader middleware will be called 85 | return None 86 | 87 | def process_response(self, request, response, spider): 88 | # Called with the response returned from the downloader. 89 | 90 | # Must either; 91 | # - return a Response object 92 | # - return a Request object 93 | # - or raise IgnoreRequest 94 | return response 95 | 96 | def process_exception(self, request, exception, spider): 97 | # Called when a download handler or a process_request() 98 | # (from other downloader middleware) raises an exception. 99 | 100 | # Must either: 101 | # - return None: continue processing this exception 102 | # - return a Response object: stops process_exception() chain 103 | # - return a Request object: stops process_exception() chain 104 | pass 105 | 106 | def spider_opened(self, spider): 107 | spider.logger.info('Spider opened: %s' % spider.name) 108 | 109 | 110 | class CustomCookiesMiddleware(object): 111 | """ 112 | Add custom cookies to each request. 113 | """ 114 | 115 | def __init__(self): 116 | cookies = SimpleCookie() 117 | cookies.load(configs.COOKIES) 118 | self.cookies = {k: m.value for k, m in cookies.items()} 119 | 120 | def process_request(self, request, spider): 121 | request.cookies = self.cookies 122 | 123 | 124 | class WeiboAPIMiddleware(object): 125 | """ 126 | Extract the "data" field of api responses. 127 | """ 128 | 129 | def process_response(self, request, response, spider): 130 | if 'ajax' in response.url: 131 | try: 132 | json = response.json() 133 | except JSONDecodeError: 134 | raise IgnoreRequest(f'Cookie expired or API changed: cannot parse json from {response.url}') 135 | 136 | if json.get('ok') != 1 or 'data' not in json: 137 | raise IgnoreRequest(f'API {response.url} returns invalid data: {json}') 138 | 139 | response._cached_decoded_json = json['data'] 140 | 141 | return response 142 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Private configs 2 | weibo/configs.py 3 | 4 | # Created by https://www.toptal.com/developers/gitignore/api/python,pycharm+all 5 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm+all 6 | 7 | ### PyCharm+all ### 8 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 9 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 10 | 11 | # User-specific stuff 12 | .idea/**/workspace.xml 13 | .idea/**/tasks.xml 14 | .idea/**/usage.statistics.xml 15 | .idea/**/dictionaries 16 | .idea/**/shelf 17 | 18 | # AWS User-specific 19 | .idea/**/aws.xml 20 | 21 | # Generated files 22 | .idea/**/contentModel.xml 23 | 24 | # Sensitive or high-churn files 25 | .idea/**/dataSources/ 26 | .idea/**/dataSources.ids 27 | .idea/**/dataSources.local.xml 28 | .idea/**/sqlDataSources.xml 29 | .idea/**/dynamic.xml 30 | .idea/**/uiDesigner.xml 31 | .idea/**/dbnavigator.xml 32 | 33 | # Gradle 34 | .idea/**/gradle.xml 35 | .idea/**/libraries 36 | 37 | # Gradle and Maven with auto-import 38 | # When using Gradle or Maven with auto-import, you should exclude module files, 39 | # since they will be recreated, and may cause churn. Uncomment if using 40 | # auto-import. 41 | # .idea/artifacts 42 | # .idea/compiler.xml 43 | # .idea/jarRepositories.xml 44 | # .idea/modules.xml 45 | # .idea/*.iml 46 | # .idea/modules 47 | # *.iml 48 | # *.ipr 49 | 50 | # CMake 51 | cmake-build-*/ 52 | 53 | # Mongo Explorer plugin 54 | .idea/**/mongoSettings.xml 55 | 56 | # File-based project format 57 | *.iws 58 | 59 | # IntelliJ 60 | out/ 61 | 62 | # mpeltonen/sbt-idea plugin 63 | .idea_modules/ 64 | 65 | # JIRA plugin 66 | atlassian-ide-plugin.xml 67 | 68 | # Cursive Clojure plugin 69 | .idea/replstate.xml 70 | 71 | # SonarLint plugin 72 | .idea/sonarlint/ 73 | 74 | # Crashlytics plugin (for Android Studio and IntelliJ) 75 | com_crashlytics_export_strings.xml 76 | crashlytics.properties 77 | crashlytics-build.properties 78 | fabric.properties 79 | 80 | # Editor-based Rest Client 81 | .idea/httpRequests 82 | 83 | # Android studio 3.1+ serialized cache file 84 | .idea/caches/build_file_checksums.ser 85 | 86 | ### PyCharm+all Patch ### 87 | # Ignores the whole .idea folder and all .iml files 88 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 89 | 90 | .idea/* 91 | 92 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 93 | 94 | *.iml 95 | modules.xml 96 | .idea/misc.xml 97 | *.ipr 98 | 99 | # Sonarlint plugin 100 | .idea/sonarlint 101 | 102 | ### Python ### 103 | # Byte-compiled / optimized / DLL files 104 | __pycache__/ 105 | *.py[cod] 106 | *$py.class 107 | 108 | # C extensions 109 | *.so 110 | 111 | # Distribution / packaging 112 | .Python 113 | build/ 114 | develop-eggs/ 115 | dist/ 116 | downloads/ 117 | eggs/ 118 | .eggs/ 119 | lib/ 120 | lib64/ 121 | parts/ 122 | sdist/ 123 | var/ 124 | wheels/ 125 | share/python-wheels/ 126 | *.egg-info/ 127 | .installed.cfg 128 | *.egg 129 | MANIFEST 130 | 131 | # PyInstaller 132 | # Usually these files are written by a python script from a template 133 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 134 | *.manifest 135 | *.spec 136 | 137 | # Installer logs 138 | pip-log.txt 139 | pip-delete-this-directory.txt 140 | 141 | # Unit test / coverage reports 142 | htmlcov/ 143 | .tox/ 144 | .nox/ 145 | .coverage 146 | .coverage.* 147 | .cache 148 | nosetests.xml 149 | coverage.xml 150 | *.cover 151 | *.py,cover 152 | .hypothesis/ 153 | .pytest_cache/ 154 | cover/ 155 | 156 | # Translations 157 | *.mo 158 | *.pot 159 | 160 | # Django stuff: 161 | *.log 162 | local_settings.py 163 | db.sqlite3 164 | db.sqlite3-journal 165 | 166 | # Flask stuff: 167 | instance/ 168 | .webassets-cache 169 | 170 | # Scrapy stuff: 171 | .scrapy 172 | 173 | # Sphinx documentation 174 | docs/_build/ 175 | 176 | # PyBuilder 177 | .pybuilder/ 178 | target/ 179 | 180 | # Jupyter Notebook 181 | .ipynb_checkpoints 182 | 183 | # IPython 184 | profile_default/ 185 | ipython_config.py 186 | 187 | # pyenv 188 | # For a library or package, you might want to ignore these files since the code is 189 | # intended to run in multiple environments; otherwise, check them in: 190 | # .python-version 191 | 192 | # pipenv 193 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 194 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 195 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 196 | # install all needed dependencies. 197 | #Pipfile.lock 198 | 199 | # poetry 200 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 201 | # This is especially recommended for binary packages to ensure reproducibility, and is more 202 | # commonly ignored for libraries. 203 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 204 | #poetry.lock 205 | 206 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 207 | __pypackages__/ 208 | 209 | # Celery stuff 210 | celerybeat-schedule 211 | celerybeat.pid 212 | 213 | # SageMath parsed files 214 | *.sage.py 215 | 216 | # Environments 217 | .env 218 | .venv 219 | env/ 220 | venv/ 221 | ENV/ 222 | env.bak/ 223 | venv.bak/ 224 | 225 | # Spyder project settings 226 | .spyderproject 227 | .spyproject 228 | 229 | # Rope project settings 230 | .ropeproject 231 | 232 | # mkdocs documentation 233 | /site 234 | 235 | # mypy 236 | .mypy_cache/ 237 | .dmypy.json 238 | dmypy.json 239 | 240 | # Pyre type checker 241 | .pyre/ 242 | 243 | # pytype static type analyzer 244 | .pytype/ 245 | 246 | # Cython debug symbols 247 | cython_debug/ 248 | 249 | # PyCharm 250 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 251 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 252 | # and can be added to the global gitignore or merged into this file. For a more nuclear 253 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 254 | #.idea/ 255 | 256 | # End of https://www.toptal.com/developers/gitignore/api/python,pycharm+all --------------------------------------------------------------------------------