├── imgs
    ├── n
    └── image.png
├── README.md
├── Csgo compare.py
├── SteamCsgo.py
└── BuffCsgo.py


/imgs/n:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/imgs/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/badiaog/crwal-Csgo-steam-buff/HEAD/imgs/image.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Csgo饰品 Buff商城及Steam市场的部分数据爬取
 2 | Python所写,爬虫小白的练手,代码有不少不合理的地方,望各位大佬指正
 3 | 主要为了方便朋友找到合适饰品去以折扣价"充值Steam余额"来买游戏
 4 | ## 现在steam市场从外部获取的价格不一定是最低价，点进去可能价格较高的排在第一位，有空再改
 5 | ## 获取到的字段
 6 | ### Buff商城
 7 | - 饰品名称
 8 | - Buff饰品在售数量
 9 | - Buff饰品价格
10 | 
11 | ### Steam市场
12 | - 饰品名称
13 | - Steam饰品在售数量
14 | - Steam饰品价格
15 | 
16 | ## 需要传入的信息
17 | ### Buff爬虫
18 | - category:所需要爬取的类目 Buff将手枪机枪等分类 如pistol shotgun 等
19 | - save_file_path:所需要存储的路径 后缀需是.csv
20 | - _: Buff商城 类似时间戳的玩意 不太清楚这东西,可以在浏览器抓包工具XHR中获取
21 | - price_range:价格区间 若填写200 则为10-210 400则为10-410
22 | - cookie:另外需要在初始化请求头的函数中填入你在Buff的cookie
23 | 
24 | ### Steam爬虫
25 | - start:从第几个商品开始爬取 已设置为价格升序 6500差不多是1.5刀
26 | - save_file_path:所需要存储的路径 后缀需是.csv
27 | - page_num:要爬多少页 每页已设置为100个商品
28 | 不需要Cookie 但可能需要梯子
29 | 
30 | ## 可获得结果
31 | - 将获取的数据通过简单的分析后,筛选出在售数量>100 且根据当前美元汇率得出最终的倒卖比,再以倒卖比升序进行排序输出DataFrame
32 | - 图示为2021/2/26日
33 | ![image](https://github.com/badiaog/crwal-Csgo-steam-buff/blob/main/imgs/image.png)
34 | 
35 | - 其中steam的饰品价格为美元 
36 | - Steam当前可获收益的计算公式为:steam饰品价格 * 当天美元汇率 * 0.85(steam卖出需15%手续费)
37 | - 倒卖比即Buff当前价格 / Steam当前可获收益 可以理解为可以以多少折扣购入steam余额
38 | 
39 | 
40 | - 声明:此代码仅个人小白学习练手,代码多有不合理之处望各位指点
41 | 


--------------------------------------------------------------------------------
/Csgo compare.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import re
 3 | import requests
 4 | from lxml import etree
 5 | 
 6 | df_steam = pd.read_csv('./CsgoSteam.csv')
 7 | df_buffrifle = pd.read_csv('./rifle.csv')
 8 | df_buffsmg = pd.read_csv('./smg.csv')
 9 | df_buffshotgun = pd.read_csv('./shotgun.csv')
10 | df_buffmachinegun = pd.read_csv('./machinegun.csv')
11 | #拼接buff的数据
12 | df_buff = pd.read_csv('./pistol.csv')
13 | df_buff = pd.concat((df_buff,df_buffrifle,df_buffshotgun,df_buffsmg,df_buffmachinegun))
14 | df_buff = df_buff[df_buff['Buff当前在售数量']>=100] #筛选大于100在售
15 | #只选择steam中在售数量>=100的
16 | df_steam = df_steam[df_steam['当前在售数量']>=100]
17 | 
18 | #横向拼接两组数据中相同名称的行
19 | df = pd.merge(df_steam,df_buff,how='outer')
20 | df = df.dropna()
21 | df.drop_duplicates()
22 | 
23 | #自定义函数以找出steam当前价格中的数字
24 | def find_nums(s):
25 |     return re.findall(r"\d+\.?\d*",str(s))[0]
26 | df['饰品价格'] = df['饰品价格'].map(find_nums)
27 | df["饰品价格"] = pd.to_numeric(df["饰品价格"],errors='coerce')
28 | 
29 | #获取当前美元汇率
30 | def get_rate():
31 |     url = 'https://www.huilv.cc/USD_CNY/'
32 |     response = requests.get(url = url).text
33 |     tree = etree.HTML(response)
34 |     rate = float(tree.xpath('//*[@id="main"]/div[1]/div[2]/span[1]/text()')[0])
35 |     return rate
36 | 
37 | #得出倒卖比并排序
38 | rate = get_rate()
39 | df['steam当前可获得收益'] = df['饰品价格']*rate*0.85
40 | df['倒卖比'] = df['Buff当前价格'] / df['steam当前可获得收益']
41 | df.sort_values(by = '倒卖比').drop_duplicates()


--------------------------------------------------------------------------------
/SteamCsgo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import requests
 4 | import pandas as pd
 5 | import random
 6 | from fake_useragent import UserAgent
 7 | from lxml import etree
 8 | from urllib.parse import urlencode
 9 | 
10 | 
11 | class SteamCsgo:
12 |     def __init__(self, start, save_file_path, page_num):
13 |         # 确认起始爬取值 第几个商品
14 |         self.start = start
15 |         # 确认要爬多少页
16 |         self.page_num = page_num
17 |         # 确认存储位置
18 |         self.save_file_path = save_file_path
19 |         # 初始化数据列表
20 |         self.item_datas = []
21 |         # 定义url前头
22 |         self.base_url = 'https://steamcommunity.com/market/search/render/?query=&'
23 | 
24 |     def get_page(self):
25 |         count = 0
26 |         for page in range(self.page_num):
27 |             params = {
28 |                 'start': self.start + 100 * page,
29 |                 'count': 100,
30 |                 'search_descriptions': 0,
31 |                 'sort_column': 'price',
32 |                 'sort_dir': 'asc',
33 |                 'appid': 730
34 |             }
35 |             current_url = 'https://steamcommunity.com/market/search/render/?query=&' + urlencode(params)
36 |             try:
37 |                 requests.DEFAULT_RETRIES = 5  # 增加重试连接次数
38 |                 s = requests.session()
39 |                 s.keep_alive = False  # 关闭多余连接
40 |                 res = requests.get(url=current_url, headers=self.init_headers())
41 |                 if res.status_code == 200:
42 |                     count = count + 1
43 |                     if count % 10 == 0:
44 |                         time.sleep(60)
45 |                     print('已成功获取第{}页'.format(page + 1))
46 |                     page_info = res.json()
47 |                     self.parse_page(page_info)
48 |                     self.save_to_csv()
49 |                     time.sleep(random.random() * 15)
50 |                 else:
51 |                     print('失败')
52 |             except requests.ConnectionError as e:
53 |                 print(e)
54 |                 print('{}页获取失败'.format(page))
55 |                 return None
56 | 
57 |     def parse_page(self, page_info):
58 |         page_html = page_info['results_html'].replace('\r', '').replace('\n', '').replace('\t', '')
59 |         tree = etree.HTML(page_html)
60 | 
61 |         for i in tree.xpath('//a[@class="market_listing_row_link"]'):
62 |             info = {}
63 |             info['饰品名称'] = i.xpath('.//span[@class="market_listing_item_name"]/text()')[0]  # 名称
64 |             info['饰品价格'] = i.xpath('.//span[@class="normal_price"]/text()')[0]  # 起价
65 |             info['当前在售数量'] = i.xpath('.//span[@class="market_listing_num_listings_qty"]/@data-qty')[0]  # 当前在售数量
66 |             self.item_datas.append(info)
67 | 
68 |     # 存储到csv
69 |     def save_to_csv(self):
70 |         df = pd.DataFrame(self.item_datas)
71 |         df = df.reindex(columns=['饰品名称', '饰品价格', '当前在售数量'])
72 |         if os.path.exists(self.save_file_path) and os.path.getsize(self.save_file_path):
73 |             df.to_csv(self.save_file_path, mode='a', encoding='utf-8', header=None, index=False)
74 |         else:
75 |             df.to_csv(self.save_file_path, mode='a', encoding='utf-8', index=False)
76 |             print('已创建' + self.save_file_path)
77 |         self.item_datas = []
78 | 
79 |     # 生成随机ua
80 |     def init_headers(self):
81 |         headers = {
82 |             'User-Agent': UserAgent().random,
83 |             'Accept-Language': 'zh-CN',
84 |             # 'Referer': 'https: // steamcommunity.com / market / search?appid = 730'
85 |         }
86 |         return headers
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     S = SteamCsgo(6500, './CsgoSteam.csv', 60)
91 |     S.get_page()
92 | 


--------------------------------------------------------------------------------
/BuffCsgo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import requests
  4 | import pandas as pd
  5 | import random
  6 | from fake_useragent import UserAgent
  7 | from urllib.parse import urlencode
  8 | 
  9 | 
 10 | class BuffCsgo:
 11 |     def __init__(self, category, save_file_path, _, price_range):
 12 |         # 时间戳
 13 |         self._ = _
 14 |         # 类目
 15 |         self.category = category
 16 |         # 存储位置
 17 |         self.save_file_path = save_file_path
 18 |         # 价格区间 从50起步
 19 |         self.price_range = price_range
 20 |         # 一个临时的存储当前页信息的列表
 21 |         self.item_datas = []
 22 |         # 不变的url
 23 |         self.base_url = 'https://buff.163.com/api/market/goods?'
 24 |     # 获取当前时间戳
 25 |     def get_current_time(self):
 26 |         return round(time.time()*1000)
 27 | 
 28 |     # 获取页数
 29 |     def get_total_page(self):
 30 |         params = {
 31 |             'game': 'csgo',
 32 |             'page_num': 1,
 33 |             'category_group': self.category,
 34 |             'min_price': 50,
 35 |             'max_price': 50 + self.price_range,
 36 |             '_': self._
 37 |         }
 38 |         url = self.base_url + urlencode(params)
 39 |         try:
 40 |             response = requests.get(url=url, headers=self.init_headers(), proxies=self.random_ip(), timeout=10)
 41 |             if response.status_code == 200:
 42 |                 page_text = response.json()
 43 |                 if page_text.get('data'):
 44 |                     total_page = page_text.get('data').get('total_page')
 45 |                     return total_page
 46 |         except requests.ConnectionError as e:
 47 |             print("wrong in collecting total_page")
 48 | 
 49 |     def get_page(self):
 50 |          for page in range(1,self.get_total_page()+1):
 51 |          #for page in range(1, 4):  # 测试
 52 |             params = {
 53 |                 'game': 'csgo',
 54 |                 'page_num': page,
 55 |                 'category_group': self.category,
 56 |                 'min_price': 50,
 57 |                 'max_price': 50 + self.price_range,
 58 |                 '_': self._
 59 |             }
 60 |             current_url = self.base_url + urlencode(params)
 61 |             try:
 62 |                 response = requests.get(url=current_url, headers=self.init_headers(), proxies = self.random_ip(),timeout=10)
 63 |                 if response.status_code == 200:
 64 |                     print(f'已获取第{page}页')
 65 |                     self._ = self.get_current_time()
 66 |                     page_text = response.json()
 67 |                     self.parse_page(page_text)
 68 |                     self.save_to_csv()
 69 |                     time.sleep(random.random() * 8)
 70 |             except requests.ConnectionError as e:
 71 |                 print('获取失败')
 72 | 
 73 |     def parse_page(self, page_text):
 74 |         if page_text.get('data').get('items'):
 75 |             for item in page_text.get('data').get('items'):
 76 |                 info = {}
 77 |                 info['饰品名称'] = item.get('name')
 78 |                 info['Buff当前价格'] = item.get('quick_price')
 79 |                 info['Buff当前在售数量'] = item.get('sell_num')
 80 |                 self.item_datas.append(info)
 81 | 
 82 |     def init_headers(self):
 83 |         cookie = 'yourcookie' # 输入你自己的cookie
 84 |         headers = {
 85 |             'User-Agent': UserAgent().random,
 86 |             'Cookie': cookie
 87 |         }
 88 |         return headers
 89 | 
 90 |     # 随机取ip
 91 |     def random_ip(self):
 92 |         proxies = [
 93 |             '120.232.150.110:80',
 94 |             '106.45.221.69:3256',
 95 |             '47.98.208.18:8080',
 96 |             '117.24.80.59:3256',
 97 |             '111.179.73.203:3256',
 98 |             '47.95.178.212:3128',
 99 |             '125.87.84.82:3256',
100 |             '47.98.179.39:8080',
101 |             '116.62.113.142:1081',
102 |             '114.215.172.136:31280',
103 |             '47.98.183.59:3128',
104 |             '118.194.242.184:80',
105 |             '114.67.108.243:8081',
106 |             '120.232.150.100:80'
107 |         ]
108 |         proxy = {
109 |             'http': 'http://' + random.choice(proxies)
110 |         }
111 |         return proxy
112 | 
113 |     # 存储到csv
114 |     def save_to_csv(self):
115 |         df = pd.DataFrame(self.item_datas)
116 |         df = df.reindex(columns=['饰品名称', 'Buff当前价格', 'Buff当前在售数量'])
117 |         if os.path.exists(self.save_file_path) and os.path.getsize(self.save_file_path):
118 |             df.to_csv(self.save_file_path, mode='a', encoding='utf-8', header=None, index=False)
119 |         else:
120 |             df.to_csv(self.save_file_path, mode='a', encoding='utf-8', index=False)
121 |             print('已创建' + self.save_file_path)
122 |         self.item_datas = []
123 | 
124 | 
125 | for category in ['pistol', 'rifle', 'smg', 'shotgun', 'machinegun']: #刀拳套贴花没爬:
126 |     save_file_path = '{}.csv'.format(category)
127 |     B = BuffCsgo(category, save_file_path, 1614323440986, 200)
128 |     total_page = B.get_total_page()
129 |     print(f'当前{category}类目共有{total_page}页')
130 |     B.get_page()
131 | 


--------------------------------------------------------------------------------