├── BuffCsgo.py ├── Csgo compare.py ├── README.md ├── SteamCsgo.py └── imgs ├── image.png └── n /BuffCsgo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import requests 4 | import pandas as pd 5 | import random 6 | from fake_useragent import UserAgent 7 | from urllib.parse import urlencode 8 | 9 | 10 | class BuffCsgo: 11 | def __init__(self, category, save_file_path, _, price_range): 12 | # 时间戳 13 | self._ = _ 14 | # 类目 15 | self.category = category 16 | # 存储位置 17 | self.save_file_path = save_file_path 18 | # 价格区间 从50起步 19 | self.price_range = price_range 20 | # 一个临时的存储当前页信息的列表 21 | self.item_datas = [] 22 | # 不变的url 23 | self.base_url = 'https://buff.163.com/api/market/goods?' 24 | # 获取当前时间戳 25 | def get_current_time(self): 26 | return round(time.time()*1000) 27 | 28 | # 获取页数 29 | def get_total_page(self): 30 | params = { 31 | 'game': 'csgo', 32 | 'page_num': 1, 33 | 'category_group': self.category, 34 | 'min_price': 50, 35 | 'max_price': 50 + self.price_range, 36 | '_': self._ 37 | } 38 | url = self.base_url + urlencode(params) 39 | try: 40 | response = requests.get(url=url, headers=self.init_headers(), proxies=self.random_ip(), timeout=10) 41 | if response.status_code == 200: 42 | page_text = response.json() 43 | if page_text.get('data'): 44 | total_page = page_text.get('data').get('total_page') 45 | return total_page 46 | except requests.ConnectionError as e: 47 | print("wrong in collecting total_page") 48 | 49 | def get_page(self): 50 | for page in range(1,self.get_total_page()+1): 51 | #for page in range(1, 4): # 测试 52 | params = { 53 | 'game': 'csgo', 54 | 'page_num': page, 55 | 'category_group': self.category, 56 | 'min_price': 50, 57 | 'max_price': 50 + self.price_range, 58 | '_': self._ 59 | } 60 | current_url = self.base_url + urlencode(params) 61 | try: 62 | response = requests.get(url=current_url, headers=self.init_headers(), proxies = self.random_ip(),timeout=10) 63 | if response.status_code == 200: 64 | print(f'已获取第{page}页') 65 | self._ = self.get_current_time() 66 | page_text = response.json() 67 | self.parse_page(page_text) 68 | self.save_to_csv() 69 | time.sleep(random.random() * 8) 70 | except requests.ConnectionError as e: 71 | print('获取失败') 72 | 73 | def parse_page(self, page_text): 74 | if page_text.get('data').get('items'): 75 | for item in page_text.get('data').get('items'): 76 | info = {} 77 | info['饰品名称'] = item.get('name') 78 | info['Buff当前价格'] = item.get('quick_price') 79 | info['Buff当前在售数量'] = item.get('sell_num') 80 | self.item_datas.append(info) 81 | 82 | def init_headers(self): 83 | cookie = 'yourcookie' # 输入你自己的cookie 84 | headers = { 85 | 'User-Agent': UserAgent().random, 86 | 'Cookie': cookie 87 | } 88 | return headers 89 | 90 | # 随机取ip 91 | def random_ip(self): 92 | proxies = [ 93 | '120.232.150.110:80', 94 | '106.45.221.69:3256', 95 | '47.98.208.18:8080', 96 | '117.24.80.59:3256', 97 | '111.179.73.203:3256', 98 | '47.95.178.212:3128', 99 | '125.87.84.82:3256', 100 | '47.98.179.39:8080', 101 | '116.62.113.142:1081', 102 | '114.215.172.136:31280', 103 | '47.98.183.59:3128', 104 | '118.194.242.184:80', 105 | '114.67.108.243:8081', 106 | '120.232.150.100:80' 107 | ] 108 | proxy = { 109 | 'http': 'http://' + random.choice(proxies) 110 | } 111 | return proxy 112 | 113 | # 存储到csv 114 | def save_to_csv(self): 115 | df = pd.DataFrame(self.item_datas) 116 | df = df.reindex(columns=['饰品名称', 'Buff当前价格', 'Buff当前在售数量']) 117 | if os.path.exists(self.save_file_path) and os.path.getsize(self.save_file_path): 118 | df.to_csv(self.save_file_path, mode='a', encoding='utf-8', header=None, index=False) 119 | else: 120 | df.to_csv(self.save_file_path, mode='a', encoding='utf-8', index=False) 121 | print('已创建' + self.save_file_path) 122 | self.item_datas = [] 123 | 124 | 125 | for category in ['pistol', 'rifle', 'smg', 'shotgun', 'machinegun']: #刀拳套贴花没爬: 126 | save_file_path = '{}.csv'.format(category) 127 | B = BuffCsgo(category, save_file_path, 1614323440986, 200) 128 | total_page = B.get_total_page() 129 | print(f'当前{category}类目共有{total_page}页') 130 | B.get_page() 131 | -------------------------------------------------------------------------------- /Csgo compare.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | import requests 4 | from lxml import etree 5 | 6 | df_steam = pd.read_csv('./CsgoSteam.csv') 7 | df_buffrifle = pd.read_csv('./rifle.csv') 8 | df_buffsmg = pd.read_csv('./smg.csv') 9 | df_buffshotgun = pd.read_csv('./shotgun.csv') 10 | df_buffmachinegun = pd.read_csv('./machinegun.csv') 11 | #拼接buff的数据 12 | df_buff = pd.read_csv('./pistol.csv') 13 | df_buff = pd.concat((df_buff,df_buffrifle,df_buffshotgun,df_buffsmg,df_buffmachinegun)) 14 | df_buff = df_buff[df_buff['Buff当前在售数量']>=100] #筛选大于100在售 15 | #只选择steam中在售数量>=100的 16 | df_steam = df_steam[df_steam['当前在售数量']>=100] 17 | 18 | #横向拼接两组数据中相同名称的行 19 | df = pd.merge(df_steam,df_buff,how='outer') 20 | df = df.dropna() 21 | df.drop_duplicates() 22 | 23 | #自定义函数以找出steam当前价格中的数字 24 | def find_nums(s): 25 | return re.findall(r"\d+\.?\d*",str(s))[0] 26 | df['饰品价格'] = df['饰品价格'].map(find_nums) 27 | df["饰品价格"] = pd.to_numeric(df["饰品价格"],errors='coerce') 28 | 29 | #获取当前美元汇率 30 | def get_rate(): 31 | url = 'https://www.huilv.cc/USD_CNY/' 32 | response = requests.get(url = url).text 33 | tree = etree.HTML(response) 34 | rate = float(tree.xpath('//*[@id="main"]/div[1]/div[2]/span[1]/text()')[0]) 35 | return rate 36 | 37 | #得出倒卖比并排序 38 | rate = get_rate() 39 | df['steam当前可获得收益'] = df['饰品价格']*rate*0.85 40 | df['倒卖比'] = df['Buff当前价格'] / df['steam当前可获得收益'] 41 | df.sort_values(by = '倒卖比').drop_duplicates() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Csgo饰品 Buff商城及Steam市场的部分数据爬取 2 | Python所写,爬虫小白的练手,代码有不少不合理的地方,望各位大佬指正 3 | 主要为了方便朋友找到合适饰品去以折扣价"充值Steam余额"来买游戏 4 | ## 现在steam市场从外部获取的价格不一定是最低价,点进去可能价格较高的排在第一位,有空再改 5 | ## 获取到的字段 6 | ### Buff商城 7 | - 饰品名称 8 | - Buff饰品在售数量 9 | - Buff饰品价格 10 | 11 | ### Steam市场 12 | - 饰品名称 13 | - Steam饰品在售数量 14 | - Steam饰品价格 15 | 16 | ## 需要传入的信息 17 | ### Buff爬虫 18 | - category:所需要爬取的类目 Buff将手枪机枪等分类 如pistol shotgun 等 19 | - save_file_path:所需要存储的路径 后缀需是.csv 20 | - _: Buff商城 类似时间戳的玩意 不太清楚这东西,可以在浏览器抓包工具XHR中获取 21 | - price_range:价格区间 若填写200 则为10-210 400则为10-410 22 | - cookie:另外需要在初始化请求头的函数中填入你在Buff的cookie 23 | 24 | ### Steam爬虫 25 | - start:从第几个商品开始爬取 已设置为价格升序 6500差不多是1.5刀 26 | - save_file_path:所需要存储的路径 后缀需是.csv 27 | - page_num:要爬多少页 每页已设置为100个商品 28 | 不需要Cookie 但可能需要梯子 29 | 30 | ## 可获得结果 31 | - 将获取的数据通过简单的分析后,筛选出在售数量>100 且根据当前美元汇率得出最终的倒卖比,再以倒卖比升序进行排序输出DataFrame 32 | - 图示为2021/2/26日 33 | ![image](https://github.com/badiaog/crwal-Csgo-steam-buff/blob/main/imgs/image.png) 34 | 35 | - 其中steam的饰品价格为美元 36 | - Steam当前可获收益的计算公式为:steam饰品价格 * 当天美元汇率 * 0.85(steam卖出需15%手续费) 37 | - 倒卖比即Buff当前价格 / Steam当前可获收益 可以理解为可以以多少折扣购入steam余额 38 | 39 | 40 | - 声明:此代码仅个人小白学习练手,代码多有不合理之处望各位指点 41 | -------------------------------------------------------------------------------- /SteamCsgo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import requests 4 | import pandas as pd 5 | import random 6 | from fake_useragent import UserAgent 7 | from lxml import etree 8 | from urllib.parse import urlencode 9 | 10 | 11 | class SteamCsgo: 12 | def __init__(self, start, save_file_path, page_num): 13 | # 确认起始爬取值 第几个商品 14 | self.start = start 15 | # 确认要爬多少页 16 | self.page_num = page_num 17 | # 确认存储位置 18 | self.save_file_path = save_file_path 19 | # 初始化数据列表 20 | self.item_datas = [] 21 | # 定义url前头 22 | self.base_url = 'https://steamcommunity.com/market/search/render/?query=&' 23 | 24 | def get_page(self): 25 | count = 0 26 | for page in range(self.page_num): 27 | params = { 28 | 'start': self.start + 100 * page, 29 | 'count': 100, 30 | 'search_descriptions': 0, 31 | 'sort_column': 'price', 32 | 'sort_dir': 'asc', 33 | 'appid': 730 34 | } 35 | current_url = 'https://steamcommunity.com/market/search/render/?query=&' + urlencode(params) 36 | try: 37 | requests.DEFAULT_RETRIES = 5 # 增加重试连接次数 38 | s = requests.session() 39 | s.keep_alive = False # 关闭多余连接 40 | res = requests.get(url=current_url, headers=self.init_headers()) 41 | if res.status_code == 200: 42 | count = count + 1 43 | if count % 10 == 0: 44 | time.sleep(60) 45 | print('已成功获取第{}页'.format(page + 1)) 46 | page_info = res.json() 47 | self.parse_page(page_info) 48 | self.save_to_csv() 49 | time.sleep(random.random() * 15) 50 | else: 51 | print('失败') 52 | except requests.ConnectionError as e: 53 | print(e) 54 | print('{}页获取失败'.format(page)) 55 | return None 56 | 57 | def parse_page(self, page_info): 58 | page_html = page_info['results_html'].replace('\r', '').replace('\n', '').replace('\t', '') 59 | tree = etree.HTML(page_html) 60 | 61 | for i in tree.xpath('//a[@class="market_listing_row_link"]'): 62 | info = {} 63 | info['饰品名称'] = i.xpath('.//span[@class="market_listing_item_name"]/text()')[0] # 名称 64 | info['饰品价格'] = i.xpath('.//span[@class="normal_price"]/text()')[0] # 起价 65 | info['当前在售数量'] = i.xpath('.//span[@class="market_listing_num_listings_qty"]/@data-qty')[0] # 当前在售数量 66 | self.item_datas.append(info) 67 | 68 | # 存储到csv 69 | def save_to_csv(self): 70 | df = pd.DataFrame(self.item_datas) 71 | df = df.reindex(columns=['饰品名称', '饰品价格', '当前在售数量']) 72 | if os.path.exists(self.save_file_path) and os.path.getsize(self.save_file_path): 73 | df.to_csv(self.save_file_path, mode='a', encoding='utf-8', header=None, index=False) 74 | else: 75 | df.to_csv(self.save_file_path, mode='a', encoding='utf-8', index=False) 76 | print('已创建' + self.save_file_path) 77 | self.item_datas = [] 78 | 79 | # 生成随机ua 80 | def init_headers(self): 81 | headers = { 82 | 'User-Agent': UserAgent().random, 83 | 'Accept-Language': 'zh-CN', 84 | # 'Referer': 'https: // steamcommunity.com / market / search?appid = 730' 85 | } 86 | return headers 87 | 88 | 89 | if __name__ == '__main__': 90 | S = SteamCsgo(6500, './CsgoSteam.csv', 60) 91 | S.get_page() 92 | -------------------------------------------------------------------------------- /imgs/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/badiaog/crwal-Csgo-steam-buff/e96cb11eb9dd5c265031a4a65a414e0371c93d83/imgs/image.png -------------------------------------------------------------------------------- /imgs/n: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------