用户不存在
' in r.text: 71 | _logger.warning('User does not exist') 72 | self._user = _userDoesNotExist 73 | else: 74 | raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})') 75 | 76 | def _check_timeline_response(self, r): 77 | if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}': 78 | # 'No content here yet'. Appears to happen sometimes on pagination, possibly due to too fast requests; retry this 79 | return False, 'no-content message' 80 | if r.status_code != 200: 81 | return False, 'non-200 status code' 82 | return True, None 83 | 84 | def _mblog_to_item(self, mblog): 85 | if mblog.get('page_info', {}).get('type') not in (None, 'video', 'webpage'): 86 | _logger.warning(f'Skipping unknown page info {mblog["page_info"]["type"]!r} on status {mblog["id"]}') 87 | return Post( 88 | url = f'https://m.weibo.cn/status/{mblog["bid"]}', 89 | id = mblog['id'], 90 | user = self._user_info_to_entity(mblog['user']) if mblog['user'] is not None else None, 91 | createdAt = mblog['created_at'], 92 | text = mblog['raw_text'] if 'raw_text' in mblog else _HTML_STRIP_PATTERN.sub('', mblog['text']), 93 | repostsCount = mblog.get('reposts_count'), 94 | commentsCount = mblog.get('comments_count'), 95 | likesCount = mblog.get('attitudes_count'), 96 | picturesCount = mblog.get('pic_num'), 97 | pictures = [x['large']['url'] for x in mblog['pics']] if 'pics' in mblog else None, 98 | video = urls.get('mp4_720p_mp4') or urls.get('mp4_hd_mp4') or urls['mp4_ld_mp4'] if 'page_info' in mblog and mblog['page_info']['type'] == 'video' and (urls := mblog['page_info']['urls']) else None, 99 | link = mblog['page_info']['page_url'] if 'page_info' in mblog and mblog['page_info']['type'] == 'webpage' else None, 100 | repostedPost = self._mblog_to_item(mblog['retweeted_status']) if 'retweeted_status' in mblog else None, 101 | ) 102 | 103 | def get_items(self): 104 | self._ensure_user_id() 105 | if self._user is _userDoesNotExist: 106 | return 107 | sinceId = None 108 | while True: 109 | sinceParam = f'&since_id={sinceId}' if sinceId is not None else '' 110 | r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}&containerid=107603{self._user}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response) 111 | if r.status_code != 200: 112 | raise snscrape.base.ScraperException(f'Got status code {r.status_code}') 113 | o = r.json() 114 | for card in o['data']['cards']: 115 | if card['card_type'] != 9: 116 | _logger.warning(f'Skipping card of type {card["card_type"]}') 117 | continue 118 | yield self._mblog_to_item(card['mblog']) 119 | if 'since_id' not in o['data']['cardlistInfo']: 120 | # End of pagination 121 | break 122 | sinceId = o['data']['cardlistInfo']['since_id'] 123 | 124 | def _user_info_to_entity(self, userInfo): 125 | return User( 126 | screenname = userInfo['screen_name'], 127 | uid = userInfo['id'], 128 | verified = userInfo['verified'], 129 | verifiedReason = userInfo.get('verified_reason'), 130 | description = userInfo['description'], 131 | statusesCount = userInfo['statuses_count'], 132 | followersCount = userInfo['followers_count'], 133 | followCount = userInfo['follow_count'], 134 | avatar = userInfo['avatar_hd'], 135 | ) 136 | 137 | def _get_entity(self): 138 | self._ensure_user_id() 139 | if self._user is _userDoesNotExist: 140 | return 141 | r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}', headers = self._headers) 142 | if r.status_code != 200: 143 | raise snscrape.base.ScraperException('Could not fetch user info') 144 | o = r.json() 145 | return self._user_info_to_entity(o['data']['userInfo']) 146 | 147 | @classmethod 148 | def _cli_setup_parser(cls, subparser): 149 | subparser.add_argument('--name', dest = 'isName', action = 'store_true', help = 'Use username instead of user ID') 150 | subparser.add_argument('user', type = snscrape.utils.nonempty_string_arg('user'), help = 'A user ID') 151 | 152 | @classmethod 153 | def _cli_from_args(cls, args): 154 | return cls._cli_construct(args, user = args.user if args.isName else int(args.user)) 155 | -------------------------------------------------------------------------------- /snscrape/utils.py: -------------------------------------------------------------------------------- 1 | def dict_map(input, keyMap): 2 | '''Return a new dict from an input dict and a {'input_key': 'output_key'} mapping''' 3 | 4 | return {outputKey: input[inputKey] for inputKey, outputKey in keyMap.items() if inputKey in input} 5 | 6 | 7 | def snake_to_camel(**kwargs): 8 | '''Return a new dict from kwargs with snake_case keys replaced by camelCase''' 9 | 10 | out = {} 11 | for key, value in kwargs.items(): 12 | keyParts = key.split('_') 13 | for i in range(1, len(keyParts)): 14 | keyParts[i] = keyParts[i][:1].upper() + keyParts[i][1:] 15 | out[''.join(keyParts)] = value 16 | return out 17 | 18 | 19 | def nonempty_string_arg(name): 20 | '''An argparse argument type factory for a non-empty string argument. The supplied `name` is used for the internal function name, resulting in better error messages.''' 21 | 22 | def f(s): 23 | s = s.strip() 24 | if s: 25 | return s 26 | raise ValueError('must not be an empty string') 27 | f.__name__ = name 28 | return f 29 | 30 | 31 | def module_deprecation_helper(all, **names): 32 | '''A helper function to generate the relevant module __getattr__ and __dir__ functions for handling deprecated names''' 33 | 34 | def __getattr__(name): 35 | if name in names: 36 | warnings.warn(f'{name} is deprecated, use {names[name].__name__} instead', DeprecatedFeatureWarning, stacklevel = 2) 37 | return names[name] 38 | raise AttributeError(f'module {__name__!r} has no attribute {name!r}') 39 | def __dir__(): 40 | return sorted(all + list(names.keys())) 41 | return __getattr__, __dir__ 42 | -------------------------------------------------------------------------------- /snscrape/version.py: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | 3 | 4 | try: 5 | __version__ = importlib.metadata.version('snscrape') 6 | except importlib.metadata.PackageNotFoundError: 7 | __version__ = None 8 | --------------------------------------------------------------------------------