├── 7_1_Word2Vec.py ├── NegativeIndustry.py ├── NegativeNews2.py ├── README.md ├── data └── test ├── div_score.py ├── emotion.py ├── function.py ├── graph └── readme ├── medical2last.zip ├── news20200103.csv ├── news20200103_2.csv ├── news_industry.py ├── news_socre.py ├── news_stock.py ├── test1.py ├── test2.py ├── test3.py ├── test4.py ├── translatetxt.py ├── translatetxt2.py ├── 哈工大停用词表.txt ├── 投资决策流程及模型部署文件的生成.pdf └── 股票择时投资策略2.pdf /7_1_Word2Vec.py: -------------------------------------------------------------------------------- 1 | #%% 2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | import collections 17 | import math 18 | import os 19 | import random 20 | import zipfile 21 | 22 | import numpy as np 23 | import urllib 24 | import tensorflow as tf 25 | 26 | #Step 1: Download the data. 27 | # url = 'http://mattmahoney.net/dc/' 28 | # 29 | # def maybe_download(filename, expected_bytes): 30 | # """Download a file if not present, and make sure it's the right size.""" 31 | # if not os.path.exists(filename): 32 | # filename, _ = urllib.request.urlretrieve(url + filename, filename) 33 | # statinfo = os.stat(filename) 34 | # if statinfo.st_size == expected_bytes: 35 | # print('Found and verified', filename) 36 | # else: 37 | # print(statinfo.st_size) 38 | # raise Exception( 39 | # 'Failed to verify ' + filename + '. Can you get to it with a browser?') 40 | # return filename 41 | 42 | #filename = maybe_download('text8.zip', 31344016) 43 | filename = 'medical2last.zip' 44 | 45 | # Read the data into a list of strings. 46 | def read_data(filename): 47 | """Extract the first file enclosed in a zip file as a list of words""" 48 | 49 | with zipfile.ZipFile(filename) as f: 50 | #f = open(filename, 'r', encoding=u'utf-8', errors='ignore') 51 | data = tf.compat.as_str(f.read(f.namelist()[0])).split() 52 | return data 53 | 54 | words = read_data(filename) 55 | 56 | #print('Data size', len(words)) 57 | 58 | # Step 2: Build the dictionary and replace rare words with UNK token. 59 | vocabulary_size = 5000 60 | 61 | def build_dataset(words): 62 | count = [['UNK', -1]] 63 | count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) 64 | dictionary = dict() 65 | for word, _ in count: 66 | dictionary[word] = len(dictionary) 67 | data = list() 68 | unk_count = 0 69 | for word in words: 70 | if word in dictionary: 71 | index = dictionary[word] 72 | else: 73 | index = 0 # dictionary['UNK'] 74 | unk_count += 1 75 | data.append(index) 76 | count[0][1] = unk_count 77 | reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 78 | return data, count, dictionary, reverse_dictionary 79 | 80 | data, count, dictionary, reverse_dictionary = build_dataset(words) 81 | del words # Hint to reduce memory. 82 | print('Most common words (+UNK)', count[:5]) 83 | print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) 84 | 85 | data_index = 0 86 | 87 | 88 | # Step 3: Function to generate a training batch for the skip-gram model. 89 | def generate_batch(batch_size, num_skips, skip_window): 90 | global data_index 91 | assert batch_size % num_skips == 0 92 | assert num_skips <= 2 * skip_window 93 | batch = np.ndarray(shape=(batch_size), dtype=np.int32) 94 | labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) 95 | span = 2 * skip_window + 1 # [ skip_window target skip_window ] 96 | buffer = collections.deque(maxlen=span) 97 | for _ in range(span): 98 | buffer.append(data[data_index]) 99 | data_index = (data_index + 1) % len(data) 100 | for i in range(batch_size // num_skips): 101 | target = skip_window # target label at the center of the buffer 102 | targets_to_avoid = [ skip_window ] 103 | for j in range(num_skips): 104 | while target in targets_to_avoid: 105 | target = random.randint(0, span - 1) 106 | targets_to_avoid.append(target) 107 | batch[i * num_skips + j] = buffer[skip_window] 108 | labels[i * num_skips + j, 0] = buffer[target] 109 | buffer.append(data[data_index]) 110 | data_index = (data_index + 1) % len(data) 111 | return batch, labels 112 | 113 | batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) 114 | for i in range(8): 115 | print(batch[i], reverse_dictionary[batch[i]], 116 | '->', labels[i, 0], reverse_dictionary[labels[i, 0]]) 117 | 118 | # Step 4: Build and train a skip-gram model. 119 | 120 | batch_size = 128 121 | embedding_size = 128 # Dimension of the embedding vector. 122 | skip_window = 1 # How many words to consider left and right. 123 | num_skips = 2 # How many times to reuse an input to generate a label. 124 | 125 | # We pick a random validation set to sample nearest neighbors. Here we limit the 126 | # validation samples to the words that have a low numeric ID, which by 127 | # construction are also the most frequent. 128 | valid_size = 16 # Random set of words to evaluate similarity on. 129 | valid_window = 100 # Only pick dev samples in the head of the distribution. 130 | valid_examples = np.random.choice(valid_window, valid_size, replace=False) 131 | num_sampled = 64 # Number of negative examples to sample. 132 | 133 | 134 | graph = tf.Graph() 135 | with graph.as_default(): 136 | 137 | # Input data. 138 | train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) 139 | train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) 140 | valid_dataset = tf.constant(valid_examples, dtype=tf.int32) 141 | 142 | # Ops and variables pinned to the CPU because of missing GPU implementation 143 | with tf.device('/cpu:0'): 144 | # Look up embeddings for inputs. 145 | embeddings = tf.Variable( 146 | tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) 147 | embed = tf.nn.embedding_lookup(embeddings, train_inputs) 148 | 149 | # Construct the variables for the NCE loss 150 | nce_weights = tf.Variable( 151 | tf.truncated_normal([vocabulary_size, embedding_size], 152 | stddev=1.0 / math.sqrt(embedding_size))) 153 | nce_biases = tf.Variable(tf.zeros([vocabulary_size])) 154 | 155 | # Compute the average NCE loss for the batch. 156 | # tf.nce_loss automatically draws a new sample of the negative labels each 157 | # time we evaluate the loss. 158 | loss = tf.reduce_mean( 159 | tf.nn.nce_loss(weights=nce_weights, 160 | biases=nce_biases, 161 | labels=train_labels, 162 | inputs=embed, 163 | num_sampled=num_sampled, 164 | num_classes=vocabulary_size)) 165 | 166 | # Construct the SGD optimizer using a learning rate of 1.0. 167 | optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) 168 | 169 | # Compute the cosine similarity between minibatch examples and all embeddings. 170 | norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) 171 | normalized_embeddings = embeddings / norm 172 | valid_embeddings = tf.nn.embedding_lookup( 173 | normalized_embeddings, valid_dataset) 174 | similarity = tf.matmul( 175 | valid_embeddings, normalized_embeddings, transpose_b=True) 176 | 177 | # Add variable initializer. 178 | init = tf.global_variables_initializer() 179 | 180 | # Step 5: Begin training. 181 | num_steps = 10001 182 | 183 | with tf.Session(graph=graph) as session: 184 | # We must initialize all variables before we use them. 185 | init.run() 186 | print("Initialized") 187 | 188 | average_loss = 0 189 | for step in range(num_steps): 190 | batch_inputs, batch_labels = generate_batch( 191 | batch_size, num_skips, skip_window) 192 | feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels} 193 | 194 | # We perform one update step by evaluating the optimizer op (including it 195 | # in the list of returned values for session.run() 196 | _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict) 197 | average_loss += loss_val 198 | 199 | if step % 2000 == 0: 200 | if step > 0: 201 | average_loss /= 2000 202 | # The average loss is an estimate of the loss over the last 2000 batches. 203 | print("Average loss at step ", step, ": ", average_loss) 204 | average_loss = 0 205 | 206 | # Note that this is expensive (~20% slowdown if computed every 500 steps) 207 | if step % 10000 == 0: 208 | sim = similarity.eval() 209 | for i in range(valid_size): 210 | valid_word = reverse_dictionary[valid_examples[i]] 211 | top_k = 8 # number of nearest neighbors 212 | nearest = (-sim[i, :]).argsort()[1:top_k+1] 213 | log_str = "Nearest to %s:" % valid_word 214 | for k in range(top_k): 215 | close_word = reverse_dictionary[nearest[k]] 216 | log_str = "%s %s," % (log_str, close_word) 217 | print(log_str) 218 | final_embeddings = normalized_embeddings.eval() 219 | 220 | # Step 6: Visualize the embeddings. 221 | 222 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): 223 | assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" 224 | plt.figure(figsize=(18, 18)) #in inches 225 | for i, label in enumerate(labels): 226 | x, y = low_dim_embs[i,:] 227 | plt.scatter(x, y) 228 | plt.annotate(label, 229 | xy=(x, y), 230 | xytext=(5, 2), 231 | textcoords='offset points', 232 | ha='right', 233 | va='bottom') 234 | 235 | plt.savefig(filename) 236 | 237 | #%% 238 | try: 239 | from sklearn.manifold import TSNE 240 | import matplotlib.pyplot as plt 241 | 242 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) 243 | plot_only = 200 244 | low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:]) 245 | labels = [reverse_dictionary[i] for i in range(plot_only)] 246 | plot_with_labels(low_dim_embs, labels) 247 | 248 | except ImportError: 249 | print("Please install sklearn, matplotlib, and scipy to visualize embeddings.") 250 | -------------------------------------------------------------------------------- /NegativeIndustry.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import math 4 | import sys 5 | import openpyxl 6 | 7 | from openpyxl import Workbook 8 | import win32com.client 9 | from win32com.client import Dispatch 10 | 11 | 12 | # wb = Workbook('data/股票负面^^^20191227.xlsx') 13 | # negativenews = wb.worksheets[0] 14 | negative = openpyxl.load_workbook('data/newdata_stock20200115_standard.xlsx') 15 | sheetsne = negative.sheetnames 16 | negativeinds = negative[sheetsne[4]] 17 | 18 | 19 | score = openpyxl.load_workbook('data/股票所属行业.xlsx') 20 | sheetssc = score.sheetnames 21 | sc = score[sheetssc[0]] 22 | 23 | File = open("data/negativeindustry20200115.txt", "w", encoding=u'utf-8', errors='ignore') 24 | File.write("股票名称"+"," + "所属行业" +"\n") 25 | 26 | i = 2 27 | j = 2 28 | for a in range(200): 29 | ne = negativeinds.cell(row=i, column=4).value 30 | for b in range(3635): 31 | ser = sc.cell(row=j, column=3).value 32 | j = j+1 33 | k = 1 34 | if ser==ne: 35 | File.write(str(sc.cell(row=j, column=2).value) + ","+ str(sc.cell(row=j, column=3).value) + ","+"\n") 36 | j = 2 37 | i = i + 1 38 | print("ok") 39 | 40 | 41 | -------------------------------------------------------------------------------- /NegativeNews2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import math 4 | import sys 5 | import openpyxl 6 | 7 | from openpyxl import Workbook 8 | import win32com.client 9 | from win32com.client import Dispatch 10 | 11 | 12 | # wb = Workbook('data/股票负面^^^20191227.xlsx') 13 | # negativenews = wb.worksheets[0] 14 | #negative = openpyxl.load_workbook('data/negativenews20200106.xlsx') 15 | score = openpyxl.load_workbook('data/newdata_stock20200115_standard.xlsx') 16 | sheetssc = score.sheetnames 17 | sc = score[sheetssc[1]] 18 | 19 | sheetsne = score.sheetnames 20 | negativenews = score[sheetsne[3]] 21 | 22 | File = open("data/negativenews20200115.txt", "w", encoding=u'utf-8', errors='ignore') 23 | File.write("标题"+"," + "风险类别" +","+ "重要性" +","+ "证券代码" +","+ "证券简称" +","+ "公司全称"+","+ "来源" +","+ "时间"+"\n") 24 | 25 | i = 2 26 | j = 2 27 | for a in range(396): 28 | ne = negativenews.cell(row=i, column=6).value 29 | for b in range(200): 30 | ser = sc.cell(row=j, column=3).value 31 | j = j+1 32 | k = 1 33 | if ser==ne: 34 | File.write(str(negativenews.cell(row=i, column=2).value) + ","+ \ 35 | str(negativenews.cell(row=i, column=3).value) + "," + str(negativenews.cell(row=i, column=4).value)+ ","+ \ 36 | str(negativenews.cell(row=i, column=5).value) + "," + str(negativenews.cell(row=i, column=6).value)+ ","+ \ 37 | str(negativenews.cell(row=i, column=7).value) + "," + str(negativenews.cell(row=i, column=8).value) + "," + \ 38 | str(negativenews.cell(row=i, column=9).value) + "," + "\n") 39 | j = 2 40 | i = i + 1 41 | print("ok") 42 | 43 | 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Knowledge-Graph-实现步骤 2 | 1:投资策略构建步骤及思路见文件:股票择时投资策略2.pdf 3 | 2:选股模型及模型部署见:投资决策流程及模型部署文件的生成.pdf 4 | 3: 5 | 6 | 7 | 功能: 8 | 自动分析非结构化的实体,抽取三元组关系,并自动构建图谱。同时在关系的抽取上增加了关系的权重,使得构建的关系具备强弱关系,不仅能做知识推理还能做因果推理和逻辑推理, 9 | -------------------------------------------------------------------------------- /data/test: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /div_score.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import math 4 | import sys 5 | sys.path.append('E:/crystal-forcase/alpha-stock') 6 | import function 7 | big = float("inf") 8 | 9 | 10 | 11 | #读txt文件的代码 12 | csv_file = 'data/20200115_utf8.txt' 13 | f = open(csv_file, 'r', encoding=u'utf-8', errors='ignore') 14 | df = pd.read_csv(f, sep='\t') 15 | # df.dropna(inplace=True) 16 | # 读csv的代码 17 | # csv_file = 'data/newdata_stock20191203-13_standard.csv' 18 | # f = open(csv_file, 'r', encoding=u'utf-8', errors='ignore') 19 | # df = pd.read_csv(f) 20 | #print(df) 21 | 22 | 23 | #File = open("data/score.txt", "w") 24 | File = open("data/score20200115.txt", "w", encoding=u'utf-8', errors='ignore') 25 | File.write("id"+"," + "code" +","+ "name" +","+ "score" +"\n") 26 | 27 | for i in range(3659): 28 | incomev = df.iloc[i:i + 1, 8:9].values 29 | incomegrowthratev = df.iloc[i:i + 1, 20:21].values 30 | revenuesv = df.iloc[i:i + 1, 6:7].values 31 | revenuesgrowthratev = df.iloc[i:i + 1, 21:22].values 32 | ROEv = df.iloc[i:i + 1, 11:12].values 33 | pb=df.iloc[i:i+1, 5:6].values 34 | pe=df.iloc[i:i+1, 10:11].values 35 | Investmentincomev = df.iloc[i:i + 1, 19:20].values 36 | Persharereservev = df.iloc[i:i + 1, 13:14].values 37 | Netassetspersharev = df.iloc[i:i + 1, 7:8].values 38 | EPSv = df.iloc[i:i + 1, 4:5].values 39 | Operatingcashpersharev = df.iloc[i:i + 1, 11:12].values 40 | Currentliabilityv = df.iloc[i:i + 1, 14:15].values 41 | cashflowv = df.iloc[i:i + 1, 18:19].values 42 | stockholderequityv = df.iloc[i:i + 1, 16:17].values 43 | Shareholdersequityratiov = df.iloc[i:i + 1, 22:23].values 44 | operatingprofitv = df.iloc[i:i + 1, 9:10].values 45 | capitalsurplusv = df.iloc[i:i + 1, 17:18].values 46 | grossprofitratiov = df.iloc[i:i + 1, 23:24].values 47 | longtermdebtv = df.iloc[i:i + 1, 15:16].values 48 | volumeoftransactionv = df.iloc[i:i + 1, 27:28].values 49 | amountoftransactionv = df.iloc[i:i + 1, 28:29].values 50 | stagerangev = df.iloc[i:i + 1, 25:26].values 51 | KDJ_v = df.iloc[i:i + 1, 60:61].values 52 | RSI_v = df.iloc[i:i + 1, 59:60].values 53 | VAR33_v = df.iloc[i:i + 1, 30:31].values 54 | VAR35_v = df.iloc[i:i + 1, 32:33].values 55 | VAR36_v = df.iloc[i:i + 1, 33:34].values 56 | VAR37_v = df.iloc[i:i + 1, 34:35].values 57 | VAR38_v=df.iloc[i:i+1, 35:36].values 58 | VAR39_v=df.iloc[i:i+1, 36:37].values 59 | VAR40_v=df.iloc[i:i+1, 37:38].values 60 | VAR41_v=df.iloc[i:i+1, 38:39].values 61 | VAR43_v=df.iloc[i:i+1, 40:41].values 62 | VAR45_v=df.iloc[i:i+1, 42:43].values 63 | VAR46_v=df.iloc[i:i+1, 43:44].values 64 | VAR48_v=df.iloc[i:i+1, 45:46].values 65 | VAR49_v = df.iloc[i:i + 1, 46:47].values 66 | VAR50_v = df.iloc[i:i + 1, 47:48].values 67 | VAR51_v = df.iloc[i:i + 1, 48:49].values 68 | VAR53_v = df.iloc[i:i + 1, 50:51].values 69 | VAR55_v = df.iloc[i:i + 1, 52:53].values 70 | VAR56_v = df.iloc[i:i + 1, 53:54].values 71 | VAR58_v = df.iloc[i:i + 1, 55:56].values 72 | VAR60_v = df.iloc[i:i + 1, 57:58].values 73 | VAR61_v = df.iloc[i:i + 1, 58:59].values 74 | 75 | print(i) 76 | #score=function.PB_1(pb)+function.PE_1(pe) 77 | score = function.income1(incomev) +function.incomegrowthrate1(incomegrowthratev)+function.revenues1(revenuesv) + \ 78 | function.revenuesgrowthrate1(revenuesgrowthratev)+\ 79 | function.ROE1(ROEv) + function.Investmentincome1(Investmentincomev) + function.Persharereserve1(Persharereservev) + \ 80 | function.Netassetspershare1(Netassetspersharev)+\ 81 | function.EPS1(EPSv) + function.Operatingcashpershare1(Operatingcashpersharev) + \ 82 | function.Currentliability1(Currentliabilityv) + function.cashflow1(cashflowv)+\ 83 | function.stockholderequity1(stockholderequityv) + function.Shareholdersequityratio1(Shareholdersequityratiov) + \ 84 | function.operatingprofit1(operatingprofitv) + function.capitalsurplus1(capitalsurplusv)+\ 85 | function.grossprofitratio1(grossprofitratiov) + function.longtermdebt1(longtermdebtv) + \ 86 | function.volumeoftransaction1(volumeoftransactionv) + function.amountoftransaction1(amountoftransactionv)+\ 87 | function.stagerange1(stagerangev) + function.KDJ_2(KDJ_v) + \ 88 | function.PB_1(pb)+function.PE_1(pe)+ function.RSI(RSI_v) + \ 89 | function.VAR33_1(VAR33_v)+\ 90 | function.VAR35_1(VAR35_v) + function.VAR36_1(VAR36_v) + function.VAR37_1(VAR37_v) + function.VAR38_1(VAR38_v)+\ 91 | function.VAR39_1(VAR39_v) + function.VAR40_1(VAR40_v) + function.VAR43_1(VAR43_v) + function.VAR45_1(VAR45_v)+\ 92 | function.VAR46_1(VAR46_v) + function.VAR48_1(VAR48_v) + function.VAR49_1(VAR49_v) + function.VAR50_1(VAR50_v)+\ 93 | function.VAR51_1(VAR51_v) + function.VAR53_1(VAR53_v) + function.VAR55_1(VAR55_v) + function.VAR56_1(VAR56_v)+\ 94 | function.VAR58_1(VAR58_v) + function.VAR60_1(VAR60_v) + function.VAR61_1(VAR61_v) 95 | 96 | #File.write(str(df.iloc[i:i+1, 0:1].values)+"," + str(df.iloc[i:i+1, 1:2].values)+"," + "\n") 97 | File.write(str(i)+"," + str(df.iloc[i:i+1, 0:1].values)+"," + str(df.iloc[i:i+1, 1:2].values) +"," + str(score) +"," + "\n") 98 | 99 | File.close() 100 | print("ok!!!") -------------------------------------------------------------------------------- /emotion.py: -------------------------------------------------------------------------------- 1 | 2 | from snownlp import SnowNLP 3 | 4 | 5 | text2 = '2019年拉美地区经济增速放缓' 6 | text1 = '大数据“杀熟”? 鲜花包月质量忽上忽下的原因终于找到了,减薪裁员、旅客锐减……港媒:香港为春节经济忧愁' 7 | 8 | s1 = SnowNLP(text1) 9 | s2 = SnowNLP(text2) 10 | 11 | print(s1.sentiments,s2.sentiments) -------------------------------------------------------------------------------- /function.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | big = float("inf") 5 | 6 | def KDJ_2(KDJ_2): 7 | if not KDJ_2 is None: gb = 0.0001 8 | if KDJ_2<43.028: gb= 0.020408 9 | if KDJ_2>= 43.028 and KDJ_2 < 63.078: gb= 6.90263 10 | if KDJ_2>= 63.078 and KDJ_2 < 67.891: gb= 14.43635 11 | if KDJ_2>= 67.891 and KDJ_2 < 71.247: gb= 11.56308 12 | if KDJ_2>= 71.247 and KDJ_2 < 77.165: gb= 23.43814 13 | if KDJ_2>= 77.165 and KDJ_2 < 82.906: gb= 22.65665 14 | if KDJ_2>= 82.906 and KDJ_2 < 88.605: gb= 19.58754 15 | if KDJ_2 >= 88.605 and KDJ_2 < big: gb = 21.320709 16 | return gb 17 | 18 | 19 | def RSI(RSI): 20 | if not RSI is None: gb = 0.0001 21 | if RSI < 1.384: gb= 6.23399 22 | if RSI>= 1.384 and RSI < 30.405: gb= 7.864 23 | if RSI>= 30.405 and RSI < 99.759: gb= 11.9817 24 | if RSI>= 99.759 and RSI < big: gb= 0.020408 25 | return gb 26 | 27 | def VAR33_1(VAR33): 28 | if not VAR33 is None: gb = 0.0001 29 | if VAR33 < 0: gb= 3.84736 30 | if VAR33 >= 0 and VAR33 < 1: gb= 0.020408 31 | if VAR33 >= 1: gb = 0.0190408 32 | return gb 33 | 34 | 35 | def VAR35_1(VAR35): 36 | if not VAR35 is None: gb = 0.0001 37 | if VAR35 < 77319.0: gb= 24.5021 38 | if VAR35>= 77319.0 and VAR35 < 106120.0: gb= 24.13250 39 | if VAR35>= 106120.0 and VAR35 < 163609: gb= 14.6890 40 | if VAR35>= 163609 and VAR35 < 219444: gb= 17.6607 41 | if VAR35>= 219444 and VAR35 < 329607.0: gb= 4.4077 42 | if VAR35>= 329607.0 and VAR35 < 1491059: gb= 4.67171 43 | if VAR35>= 1491059 and VAR35 < 2147483647: gb= 0.02040 44 | if VAR35>= 2147483647 and VAR35 < big: gb= 0.019 45 | return gb 46 | 47 | 48 | def VAR36_1(VAR36): 49 | if not VAR36 is None: gb = 0.0001 50 | if VAR36 < 8779.51 : gb= 4.2437 51 | if VAR36>= 8779.51 and VAR36 < 12446.86 : gb= 2.7942 52 | if VAR36>= 12446.86 and VAR36 < 16709.87 : gb= 7.9176 53 | if VAR36>= 16709.87 and VAR36 < 22351.1 : gb= 11.7639 54 | if VAR36>= 22351.1 and VAR36 < 29362.59 : gb= 5.9812 55 | if VAR36>= 29362.59 and VAR36 < 43957.96 : gb= 1.15117 56 | if VAR36>= 43957.96 and VAR36 < 68756.27: gb= 4.7812 57 | if VAR36>= 68756.27 and VAR36 < 106871.38 : gb= 4.8037 58 | if VAR36>= 106871.38 and VAR36 < big : gb= 0.0204 59 | return gb 60 | 61 | 62 | def VAR37_1(VAR37): 63 | if not VAR37 is None: gb = 0.020408 64 | if VAR37 < big: gb= 2.11007 65 | return gb 66 | 67 | 68 | def VAR38_1(VAR38): 69 | if not VAR38 is None: gb = 1.70545 70 | if VAR38 < 0 : gb= 5.92976 71 | if VAR38>= 0 and VAR38 < big: gb= 0.020408 72 | return gb 73 | 74 | def VAR39_1(VAR39): 75 | if not VAR39 is None: gb = 0.020408 76 | if VAR39 < 1 : gb= 2.11007 77 | if VAR39 >= 1 : gb= 0.0204 78 | return gb 79 | 80 | 81 | def VAR40_1(VAR40): 82 | if not VAR40 is None : gb= 20.6475 83 | if VAR40>= 0 and VAR40 < 81311 : gb= 0.02040 84 | if VAR40>= 81311 and VAR40 < 160720 : gb=12.9211 85 | if VAR40>= 160720 and VAR40 < 257176 : gb=17.0822 86 | if VAR40>= 257176 and VAR40 < 333971 : gb=27.4035 87 | if VAR40>= 333971 and VAR40 < 499427 : gb=20.2264 88 | if VAR40>= 499427 and VAR40 < 716185 : gb=30.3518 89 | if VAR40>= 716185 and VAR40 < 1039738: gb=41.2039 90 | if VAR40>= 1039738 and VAR40 < big : gb=34.7442 91 | return gb 92 | 93 | 94 | def VAR41_1(VAR41): 95 | if not VAR41 is None: gb=20.0317 96 | if VAR41>= 0 and VAR41 < 11271.18 : gb=31.0293 97 | if VAR41>= 11271.18 and VAR41 < 17319.41 : gb=31.4851 98 | if VAR41>= 17319.41 and VAR41 < 22315.25 : gb=28.59064 99 | if VAR41>= 22315.25 and VAR41 < 28671.46 : gb=27.63547 100 | if VAR41>= 28671.46 and VAR41 < 36942.3 : gb=30.9357 101 | if VAR41>= 36942.3 and VAR41 < 50587.07 : gb=18.88754 102 | if VAR41>= 50587.07 and VAR41 < 71743.51 : gb=23.1686 103 | if VAR41>= 71743.51 and VAR41 < 171064 : gb=11.171407 104 | if VAR41>= 171064 and VAR41 < big : gb=0.020408 105 | return gb 106 | 107 | 108 | def VAR43_1(VAR43): 109 | if not VAR43 is None: gb = 0.0001 110 | if VAR43<0 : gb=6.14222 111 | if VAR43>= 0 and VAR43 <1: gb=0.020408 112 | if VAR43 >= 1: gb = 0.01925 113 | return gb 114 | 115 | 116 | def VAR45_1(VAR45): 117 | if not VAR45 is None: gb = 0.0001 118 | if VAR45>= 0 and VAR45 < 72020 : gb=13.3649 119 | if VAR45>= 72020 and VAR45 < 131110 : gb=4.43968 120 | if VAR45>= 131110 and VAR45 < 205731: gb=14.18636 121 | if VAR45>= 205731 and VAR45 < 294651.0 : gb=11.38658 122 | if VAR45>= 294651 and VAR45 < 452797 : gb=17.25659 123 | if VAR45>= 452797 and VAR45 < 763087 : gb=18.0357 124 | if VAR45>= 763087 and VAR45 < big : gb=0.02040 125 | return gb 126 | 127 | 128 | def VAR46_1(VAR46): 129 | if not VAR46 is None: gb = 0.0001 130 | if VAR46 < 7500.41: gb=11.21801 131 | if VAR46>= 7500.41 and VAR46 < 12116.53: gb=13.16524 132 | if VAR46>= 12116.53 and VAR46 < 20201.43: gb=9.23118 133 | if VAR46>= 20201.43 and VAR46 < 23772.09: gb=0.06967 134 | if VAR46>= 23772.09 and VAR46 < 29947.4 : gb=7.9699 135 | if VAR46>= 29947.4 and VAR46 < 37733.76 : gb=3.56138 136 | if VAR46>= 37733.76 and VAR46 < 47715.75: gb=6.6857 137 | if VAR46>= 47715.75 and VAR46 < 66729.84 : gb=5.4682 138 | if VAR46>= 66729.84 and VAR46 < 97586.22 : gb=0.020408 139 | if VAR46>= 97586.22 and VAR46 < 155363.81: gb=13.0273 140 | if VAR46 >= 155363.81 and VAR46 < big: gb = 9.8571 141 | return gb 142 | 143 | def VAR48_1(VAR48): 144 | if not VAR48 is None: gb= 0.020408 145 | if VAR48<0: gb= 1.10442 146 | if VAR48>=0 and VAR48 <1: gb= 0.17589 147 | if VAR48 >= 1: gb = 0.019589 148 | return gb 149 | 150 | 151 | def VAR49_1(VAR49): 152 | if not VAR49 is None: gb= 0.02040 153 | if VAR49 < 1 : gb= 0.8647 154 | if VAR49 >= 1: gb = 0.019 155 | return gb 156 | 157 | 158 | def VAR50_1(VAR50): 159 | if not VAR50 is None: gb= 10.2411 160 | if VAR50>= 0 and VAR50 < 88923 : gb= 3.77103 161 | if VAR50>= 88923 and VAR50 < 130928 : gb= 2.58228 162 | if VAR50>= 130928 and VAR50 < 184901 : gb= 13.17023 163 | if VAR50>= 184901 and VAR50 < 240270 : gb= 3.9062 164 | if VAR50>= 240270 and VAR50 < 324552 : gb= 7.9744 165 | if VAR50>= 324552 and VAR50 < 501716 : gb= 0.79015 166 | if VAR50>= 501716 and VAR50 < 710920 : gb= 0.020408 167 | if VAR50>= 710920 and VAR50 < 1136161 : gb= 1.167402 168 | if VAR50>= 1136161 and VAR50 < big: gb= 2.73416 169 | return gb 170 | 171 | 172 | def VAR51_1(VAR51): 173 | if not VAR51 is None: gb= 6.22334 174 | if VAR51>= 0 and VAR51 < 7423.08 : gb= 22.86327 175 | if VAR51>= 7423.08 and VAR51 < 11214.3 : gb= 15.30725 176 | if VAR51>= 11214.3 and VAR51 < 19171.22 : gb= 17.136336 177 | if VAR51>= 19171.22 and VAR51 < 25500.7 : gb= 7.82033 178 | if VAR51>= 25500.7 and VAR51 < 45908.92 : gb= 3.01746 179 | if VAR51>= 45908.92 and VAR51 < 76280.34 : gb= 4.3366 180 | if VAR51>= 76280.34 and VAR51 < 102100.6 : gb= 3.90378 181 | if VAR51>= 102100.6 and VAR51 < 165718.17 : gb=5.13207 182 | if VAR51>= 165718.17 and VAR51 < big : gb= 0.020408 183 | return gb 184 | 185 | def VAR53_1(VAR53): 186 | if not VAR53 is None: gb = 0.0001 187 | if VAR53<0 : gb= 0.54369 188 | if VAR53>= 0 and VAR53 < 1: gb= 0.020408 189 | if VAR53 >= 1 : gb = 0.019408 190 | return gb 191 | 192 | 193 | def VAR55_1(VAR55): 194 | if not VAR55 is None: gb = 0.0001 195 | if VAR55>= 0 and VAR55 < 64980 : gb=1.3912 196 | if VAR55>= 64980 and VAR55 < 106045 : gb=10.97048 197 | if VAR55>= 106045 and VAR55 < 134082 : gb=1.147016 198 | if VAR55>= 134082 and VAR55 < 166444 : gb=4.27181 199 | if VAR55>= 166444 and VAR55 < 199383 : gb=0.020408 200 | if VAR55>= 199383 and VAR55 < 245264 : gb=0.54963 201 | if VAR55>= 245264 and VAR55 < 335063 : gb=13.57916 202 | if VAR55>= 335063 and VAR55 < 473830 : gb=22.27843 203 | if VAR55>= 473830 and VAR55 < 905185 : gb=20.370161 204 | if VAR55>= 905185 and VAR55 < 1583630 : gb=24.81614 205 | if VAR55>= 1583630 and VAR55 < big : gb=35.176292 206 | return gb 207 | 208 | 209 | def VAR56_1(VAR56): 210 | if not VAR56 is None: gb = 0.0001 211 | if VAR56 < 7222.94 : gb=33.01424 212 | if VAR56>= 7222.94 and VAR56 < 13759.4 : gb=36.35627 213 | if VAR56>= 13759.4 and VAR56 < 18704.85 : gb=20.1377 214 | if VAR56>= 18704.85 and VAR56 < 25932.25 : gb=28.41691 215 | if VAR56>= 25932.25 and VAR56 < 43917.25 : gb=18.19527 216 | if VAR56>= 43917.25 and VAR56 < 67007.26 : gb=11.99811 217 | if VAR56>= 67007.26 and VAR56 < 118839.54 : gb=6.55267 218 | if VAR56>= 118839.54 and VAR56 < big : gb=0.020408 219 | return gb 220 | 221 | 222 | def VAR58_1(VAR58): 223 | if not VAR58 is None: gb = 0.0001 224 | if VAR58<0: gb=0.020408 225 | if VAR58>= 0 and VAR58 < 1 : gb=2.824744 226 | if VAR58 >= 1: gb =0.01955 227 | return gb 228 | 229 | def VAR60_1(VAR60): 230 | if not VAR60 is None: gb = 0.0001 231 | if VAR60 < 9265.39 : gb=3.93215 232 | if VAR60>= 9265.39 and VAR60 < 14198 : gb=0.020408 233 | if VAR60>= 14198 and VAR60 < 18082 : gb=6.07631 234 | if VAR60>= 18082 and VAR60 < 31583 : gb=7.05776 235 | if VAR60>= 31583 and VAR60 < 46625 : gb=9.17736 236 | if VAR60>= 46625 and VAR60 < 66075 : gb=1.72721 237 | if VAR60>= 66075 and VAR60 < 95239 : gb=4.64637 238 | if VAR60>= 95239 and VAR60 < 126157 : gb=0.79226 239 | if VAR60>= 126157 and VAR60 < 252287 : gb=10.9973 240 | if VAR60>= 252287 and VAR60 < big: gb=7.46181 241 | return gb 242 | 243 | 244 | def VAR61_1(VAR61): 245 | if not VAR61 is None: gb = 0.0001 246 | if VAR61 < 1143.44 : gb=6.94237 247 | if VAR61>= 1143.44 and VAR61 < 1718.85 : gb=0.020408 248 | if VAR61>= 1718.85 and VAR61 < 2278.54 : gb=6.5346 249 | if VAR61>= 2278.54 and VAR61 < 2910.02 : gb=5.90171 250 | if VAR61>= 2910.02 and VAR61 < 4531.11 : gb=14.12564 251 | if VAR61>= 4531.11 and VAR61 < 9736.39 : gb=7.47721 252 | if VAR61>= 9736.39 and VAR61 < big : gb=8.79076 253 | return gb 254 | 255 | #主营收入 256 | def income1(income): 257 | if not income is None: gb = 0.0001 258 | if income < 50284: gb = 2.557202 259 | if income >= 50284 and income < 67710: gb = 1.54461 260 | if income >= 67710 and income < 84880: gb = 8.5168 261 | if income >= 84880 and income < 114529: gb = 0.020408 262 | if income >= 114529 and income < 148444: gb = 8.80206 263 | if income >= 148444 and income < 266865: gb = 7.00573 264 | if income >= 266865 and income < 647837: gb = 9.94524 265 | if income >= 647837 and income < 1204372: gb = 0.14144 266 | if income >= 1204372: gb = 6.49023 267 | return gb 268 | 269 | #主营收入同比 270 | def incomegrowthrate1(incomegrowthrate): 271 | if not incomegrowthrate is None: gb = 0.0001 272 | if incomegrowthrate < 3.248 : gb=4.071247 273 | if incomegrowthrate>= 3.248 and incomegrowthrate < 11.568 : gb=0.9817 274 | if incomegrowthrate>= 11.568 and incomegrowthrate < 17.018 : gb=6.95596 275 | if incomegrowthrate>= 17.018 and incomegrowthrate < 22.093 : gb=0.020408 276 | if incomegrowthrate>= 22.093 and incomegrowthrate < 41.593 : gb=2.27162 277 | if incomegrowthrate>= 41.593 and incomegrowthrate < big : gb=7.62120 278 | return gb 279 | 280 | #净利润 281 | def revenues1(revenues): 282 | if not revenues is None: gb = 0.0001 283 | if revenues < 411 : gb=26.83916 284 | if revenues>= 411 and revenues < 3079 : gb=7.50711 285 | if revenues>= 3079 and revenues < 5152 : gb=0.020408 286 | if revenues>= 5152 and revenues < 7530 : gb=8.39818 287 | if revenues>= 7530 and revenues < 10842 : gb=10.3772 288 | if revenues>= 10842 and revenues < 21707 : gb=1.14736 289 | if revenues>= 1.14736 and revenues < 31428 : gb=7.76754 290 | if revenues>= 31428 and revenues < 53050 : gb=1.69234 291 | if revenues>= 53050 and revenues < 100060 : gb=4.03374 292 | if revenues>= 100060 and revenues < big: gb=12.01157 293 | return gb 294 | 295 | #净利润同比 296 | def revenuesgrowthrate1(revenuesgrowthrate): 297 | if not revenuesgrowthrate is None: gb = 0.0001 298 | if revenuesgrowthrate<0: gb=11.43988 299 | if revenuesgrowthrate>= 0 and revenuesgrowthrate < 7.409: gb=11.62207 300 | if revenuesgrowthrate>= 7.409 and revenuesgrowthrate < 18.649: gb=0.020408 301 | if revenuesgrowthrate>= 18.649 and revenuesgrowthrate < 39.96: gb=8.35751 302 | if revenuesgrowthrate>= 39.96 and revenuesgrowthrate < 170.955: gb=10.91314 303 | if revenuesgrowthrate>= 170.955 and revenuesgrowthrate < big: gb=8.19601 304 | return gb 305 | 306 | #净资产收益率 307 | def ROE1(ROE): 308 | if not ROE is None: gb=6.37555 309 | if ROE<0: gb=6.21541 310 | if ROE>=0 and ROE < 1: gb=0.020408 311 | if ROE >= 1 : gb = 0.01908 312 | return gb 313 | 314 | #市净率 315 | def PB_1(pb): 316 | if not pb is None: gb = 0.020408 317 | if pb < 1.28: gb = 18.8844 318 | if pb >= 1.28 and pb < 1.52: gb = 21.428206 319 | if pb >= 1.52 and pb < 1.74: gb = 16.631003 320 | if pb >= 1.74 and pb < 2.19: gb = 20.34075 321 | if pb >= 2.19 and pb < 2.68: gb = 14.47257 322 | if pb >= 2.68 and pb < 3.48: gb = 29.04896 323 | if pb >= 3.48 and pb < 5.15: gb = 27.82325 324 | if pb < big: gb = 37.95336 325 | return gb 326 | 327 | #市盈率 328 | def PE_1(PE): 329 | if not PE is None: gb=0.020408 330 | if PE < 1 : gb=89.73698 331 | if PE>= 1 and PE < 14.35 : gb=20.73637 332 | if PE>=14.35 and PE < 17.48 : gb=14.0666 333 | if PE>= 17.48 and PE <20.69 : gb=10.25078 334 | if PE>= 20.69 and PE < 28.93 : gb=14.3363 335 | if PE>=28.93 and PE < 38.85 : gb=9.7639 336 | if PE>= 38.85 and PE < 54.79 : gb=22.97599 337 | if PE>=54.79 and PE < 112.89 : gb=15.97741 338 | if PE < big: gb=21.648014 339 | return gb 340 | 341 | #投资收益 342 | def Investmentincome1(Investmentincome): 343 | if not Investmentincome is None: gb = 0.18701 344 | if Investmentincome < 28: gb= 1.1904 345 | if Investmentincome>=28 and Investmentincome <120 : gb=0.020408 346 | if Investmentincome>=120 and Investmentincome <287 : gb=11.9747 347 | if Investmentincome>= 287 and Investmentincome <709 : gb=1.61483 348 | if Investmentincome>= 709 and Investmentincome <2317 : gb=3.827008 349 | if Investmentincome>= 2317 and Investmentincome < 7014 : gb=6.2092 350 | if Investmentincome>=7014 and Investmentincome < big : gb=5.35745 351 | return gb 352 | 353 | #每股公积金 354 | def Persharereserve1(Persharereserve): 355 | if not Persharereserve is None: gb=12.91906 356 | if Persharereserve < 1 : gb=8.041266 357 | if Persharereserve>= 1 and Persharereserve <1.25 : gb=3.038358 358 | if Persharereserve>= 1.25 and Persharereserve <1.736 : gb=10.59354 359 | if Persharereserve>= 1.736 and Persharereserve < 2.001 : gb=5.60317 360 | if Persharereserve>= 2.001 and Persharereserve < 2.332 : gb=6.54077 361 | if Persharereserve>=2.332 and Persharereserve < 2.647 : gb=3.06069 362 | if Persharereserve>= 2.647 and Persharereserve <3.67 : gb=4.45335 363 | if Persharereserve>= 3.67 and Persharereserve < 4.944 : gb=4.3002 364 | if Persharereserve>= 4.944 and Persharereserve < big: gb=0.020408 365 | return gb 366 | 367 | #每股净资产 368 | def Netassetspershare1(Netassetspershare): 369 | if not Netassetspershare is None: gb = 0.0001 370 | if Netassetspershare <1.967 : gb=0.020408 371 | if Netassetspershare>= 1.967 and Netassetspershare <2.828 : gb=4.2333 372 | if Netassetspershare>= 2.828 and Netassetspershare <3.28 : gb=0.6359 373 | if Netassetspershare>= 3.28 and Netassetspershare < 4.03 : gb=7.94902 374 | if Netassetspershare>= 4.03 and Netassetspershare < 4.628 : gb=12.5024 375 | if Netassetspershare>= 4.628 and Netassetspershare < 5.389 : gb=4.53942 376 | if Netassetspershare>= 5.389 and Netassetspershare < 6.368 : gb=9.01132 377 | if Netassetspershare>= 6.368 and Netassetspershare < 9.255 : gb=19.718074 378 | if Netassetspershare>= 9.255 and Netassetspershare < big : gb=31.99056 379 | return gb 380 | 381 | #每股收益 382 | def EPS1(EPS): 383 | if not EPS is None: gb=66.897076 384 | if EPS<0: gb=0.020408 385 | if EPS>= 0 and EPS < big : gb=101.96149 386 | return gb 387 | 388 | #每股经营现金 389 | def Operatingcashpershare1(Operatingcashpershare): 390 | if not Operatingcashpershare is None: gb = 0.0001 391 | if Operatingcashpershare<0 : gb=1.74793 392 | if Operatingcashpershare>= 0 and Operatingcashpershare <1: gb=3.479613 393 | if Operatingcashpershare>= 1 and Operatingcashpershare < big: gb=0.020408 394 | return gb 395 | 396 | #流动负债 397 | def Currentliability1(Currentliability): 398 | if not Currentliability is None : gb=0.020408 399 | if Currentliability < 17749: gb=42.657299 400 | if Currentliability>= 17749 and Currentliability < 41872 : gb= 31.37411 401 | if Currentliability>= 41872 and Currentliability < 67805 : gb=35.86446 402 | if Currentliability>= 67805 and Currentliability < 94403 : gb=28.40803 403 | if Currentliability>= 94403 and Currentliability < 118701 : gb=33.06355 404 | if Currentliability>= 118701 and Currentliability < 154040 : gb=22.87249 405 | if Currentliability>= 154040 and Currentliability < 293731 : gb=26.96313 406 | if Currentliability>= 293731 and Currentliability < big : gb=19.97311 407 | return gb 408 | 409 | #经营现金流量 410 | def cashflow1(cashflow): 411 | if not cashflow is None: gb = 0.0001 412 | if cashflow<1: gb=2.035205 413 | if cashflow >=1 and cashflow < 5567 : gb=0.24589 414 | if cashflow >= 5567 and cashflow <12232 : gb=4.45852 415 | if cashflow >= 12232 and cashflow < 18996 : gb=0.020408 416 | if cashflow >= 18996 and cashflow < 34002 : gb=1.02909 417 | if cashflow >= 34002 and cashflow < 82981 : gb=2.18616 418 | if cashflow >= 82981 and cashflow < big : gb=8.71717 419 | return gb 420 | 421 | #股东权益 422 | def stockholderequity1(stockholderequity): 423 | if not stockholderequity is None: gb = 0.0001 424 | if stockholderequity < 52676 : gb=2.19407 425 | if stockholderequity>= 52676 and stockholderequity <102078: gb=0.020408 426 | if stockholderequity>= 102078 and stockholderequity <144376: gb=8.1202 427 | if stockholderequity>= 144376 and stockholderequity < 181280: gb=10.86791 428 | if stockholderequity>= 181280 and stockholderequity <278681: gb=7.95683 429 | if stockholderequity>= 278681 and stockholderequity < 453098: gb=9.81188 430 | if stockholderequity>= 453098 and stockholderequity < 652440: gb=11.1213 431 | if stockholderequity>=652440 and stockholderequity <1230921: gb=11.12022 432 | if stockholderequity>=1230921 and stockholderequity < big : gb=15.01879 433 | return gb 434 | 435 | #股东权益比 436 | def Shareholdersequityratio1(Shareholdersequityratio): 437 | if not Shareholdersequityratio is None: gb = 0.0001 438 | if Shareholdersequityratio < 29.101 : gb=0.020408 439 | if Shareholdersequityratio>= 29.101 and Shareholdersequityratio < 37.261 : gb=12.81002 440 | if Shareholdersequityratio>= 37.261 and Shareholdersequityratio <45.082 : gb=10.53649 441 | if Shareholdersequityratio>= 45.082 and Shareholdersequityratio < 53.413 : gb=5.59867 442 | if Shareholdersequityratio>= 53.413 and Shareholdersequityratio <60.846 : gb=7.77582 443 | if Shareholdersequityratio>= 60.846 and Shareholdersequityratio < 65.61 : gb=5.17068 444 | if Shareholdersequityratio>= 65.61 and Shareholdersequityratio < 73.516 : gb=11.43853 445 | if Shareholdersequityratio>= 73.516 and Shareholdersequityratio < 86.425 : gb=1.13651 446 | if Shareholdersequityratio>= 86.425 and Shareholdersequityratio < big : gb=6.14852 447 | return gb 448 | 449 | #营业利润 450 | def operatingprofit1(operatingprofit): 451 | if not operatingprofit is None: gb = 0.0001 452 | if operatingprofit<1: gb=12.06823 453 | if operatingprofit>= 1 and operatingprofit < 2177 : gb=0.0204081 454 | if operatingprofit>= 2177 and operatingprofit < 3880 : gb=7.51031 455 | if operatingprofit>= 3880 and operatingprofit < 8680 : gb=7.701602 456 | if operatingprofit>= 8680 and operatingprofit < 15406 : gb=15.90353 457 | if operatingprofit>= 15406 and operatingprofit < 48966 : gb=13.67607 458 | if operatingprofit>= 48966 and operatingprofit = 15182 and capitalsurplus < 24162 : gb=3.06527 466 | if capitalsurplus>= 24162 and capitalsurplus < 42580 : gb=11.183809 467 | if capitalsurplus>= 42580 and capitalsurplus < 63141 : gb=22.89289 468 | if capitalsurplus>= 63141 and capitalsurplus < 83469 : gb=10.07029 469 | if capitalsurplus>= 83469 and capitalsurplus < 130700 : gb=16.21322 470 | if capitalsurplus>= 130700 and capitalsurplus < 192784 : gb=19.32758 471 | if capitalsurplus>= 192784 and capitalsurplus < 437172 : gb=19.224904 472 | if capitalsurplus>= 437172 and capitalsurplus < big: gb=18.28482 473 | return gb 474 | 475 | #销售毛利率 476 | def grossprofitratio1(grossprofitratio): 477 | if not grossprofitratio is None: gb = 0.0001 478 | if grossprofitratio < 14.93 : gb=13.35899 479 | if grossprofitratio>= 14.93 and grossprofitratio < 24.901 : gb=7.53092 480 | if grossprofitratio>= 24.901 and grossprofitratio < 29.444 : gb=0.657776 481 | if grossprofitratio>= 29.444 and grossprofitratio < 34.152 : gb=9.190005 482 | if grossprofitratio>= 34.152 and grossprofitratio < 38.541 : gb=0.020408 483 | if grossprofitratio>= 38.541 and grossprofitratio < 43.684 : gb=15.0763 484 | if grossprofitratio>= 43.684 and grossprofitratio < 61.368 : gb=7.81706 485 | if grossprofitratio>= 61.368 and grossprofitratio < big : gb=9.12169 486 | return gb 487 | 488 | #长期负债 489 | def longtermdebt1(longtermdebt): 490 | if not longtermdebt is None: gb = 0.0001 491 | if longtermdebt is None: gb=16.271027 492 | if longtermdebt < 1467 : gb=6.670008 493 | if longtermdebt>= 1467 and longtermdebt < 3329 : gb=0.020408 494 | if longtermdebt>= 3329 and longtermdebt < 12102 : gb=0.32268 495 | if longtermdebt>= 12102 and longtermdebt < 21541 : gb=3.54764 496 | if longtermdebt>= 21541 and longtermdebt < 60391 : gb=3.88658 497 | if longtermdebt>= 60391 and longtermdebt < 150453 : gb=4.770717 498 | if longtermdebt>= 150453 and longtermdebt = 72943 and volumeoftransaction < 99911.91 : gb=13.92472 506 | if volumeoftransaction >= 99911.91 and volumeoftransaction < 141196 : gb=3.67683 507 | if volumeoftransaction >= 141196 and volumeoftransaction < 213835 : gb=20.59017 508 | if volumeoftransaction >= 213835 and volumeoftransaction < 276842 : gb=18.98813 509 | if volumeoftransaction >= 276842 and volumeoftransaction < 346244 : gb=25.01875 510 | if volumeoftransaction >= 346244 and volumeoftransaction < 471774 : gb=19.66208 511 | if volumeoftransaction >= 471774 and volumeoftransaction < 989027 : gb=26.09589 512 | if volumeoftransaction >= 989027 and volumeoftransaction < 1484292 : gb=23.98660 513 | if volumeoftransaction >= 1484292 and volumeoftransaction < big: gb=35.2506 514 | return gb 515 | 516 | #阶段成交额 517 | def amountoftransaction1(amountoftransaction): 518 | if not amountoftransaction is None: gb = 0.0001 519 | if amountoftransaction < 7554 : gb=34.391644 520 | if amountoftransaction>= 7554 and amountoftransaction < 10711.1 : gb=26.6226 521 | if amountoftransaction>= 10711.1 and amountoftransaction < 17070.86 : gb=14.51471 522 | if amountoftransaction>= 17070.86 and amountoftransaction < 24375.01 : gb=22.23326 523 | if amountoftransaction>= 24375.01 and amountoftransaction < 44944.95: gb=15.27538 524 | if amountoftransaction>= 44944.95 and amountoftransaction < 85508.35 : gb=20.7352 525 | if amountoftransaction>= 85508.35 and amountoftransaction < 149157.36 : gb=9.90349 526 | if amountoftransaction>= 149157.36 and amountoftransaction < big: gb=0.020408 527 | return gb 528 | 529 | #阶段涨幅 530 | def stagerange1(stagerange): 531 | if not stagerange is None: gb = 0.0001 532 | if stagerange<0 : gb=2.79856 533 | if stagerange>= 0 and stagerange < 1: gb=0.020408 534 | if stagerange >= 1 : gb = 0.029 535 | return gb 536 | 537 | -------------------------------------------------------------------------------- /graph/readme: -------------------------------------------------------------------------------- 1 | readme 2 | -------------------------------------------------------------------------------- /medical2last.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/medical2last.zip -------------------------------------------------------------------------------- /news20200103.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/news20200103.csv -------------------------------------------------------------------------------- /news20200103_2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/news20200103_2.csv -------------------------------------------------------------------------------- /news_industry.py: -------------------------------------------------------------------------------- 1 | import re 2 | import csv 3 | import pandas as pd 4 | import tushare as ts 5 | 6 | # 创建“行业_股票”的关系 7 | stockList = [] 8 | data = pd.read_csv("myJob1/kg/stock.csv") 9 | for indexs in data.index: 10 | id = data.loc[indexs].values[0] 11 | code = str(data.loc[indexs].values[2]).zfill(6) 12 | print(id, code) 13 | stockList.append([id, code]) 14 | industryList = [] 15 | data = pd.read_csv("myJob1/kg/industry.csv") 16 | for indexs in data.index: 17 | id = data.loc[indexs].values[0] 18 | industry = data.loc[indexs].values[1] 19 | print(id, industry) 20 | industryList.append([id, industry]) 21 | stock_industry_list = [] 22 | data = pd.read_csv("myJob1/stock_industry_prep.csv") 23 | for indexs in data.index: 24 | stock = str(data.loc[indexs].values[0]).zfill(6) 25 | industry = data.loc[indexs].values[2] 26 | stock_id = "" 27 | for j in range(len(stockList)): 28 | if stock == stockList[j][1]: 29 | stock_id = stockList[j][0] 30 | break 31 | industry_id = "" 32 | for j in range(len(industryList)): 33 | if industry == industryList[j][1]: 34 | industry_id = industryList[j][0] 35 | break 36 | if stock_id!="" and industry_id!="": 37 | stock_industry_list.append([stock_id, industry_id, "行业属于", "行业属于"]) 38 | # 存储关系 39 | with open("myJob1/kg/news_industry.csv","w") as csvfile: 40 | writer = csv.writer(csvfile) 41 | writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"]) 42 | for i in range(len(stock_industry_list)): 43 | writer.writerows([stock_industry_list[i]]) 44 | print(i, stock_industry_list[i]) 45 | -------------------------------------------------------------------------------- /news_socre.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # coding: utf-8 3 | import numpy as np 4 | import pandas as pd 5 | import jieba 6 | from sklearn.feature_extraction.text import CountVectorizer 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.naive_bayes import MultinomialNB 9 | 10 | #csv_file = 'data1.csv' 11 | csv_file = 'news20200103_2.csv' 12 | f = open(csv_file, 'r', encoding="gbk", errors='ignore') ##encoding=u'utf-8', encoding='gbk' 13 | #f = open(csv_file, 'r', encoding="ISO-8859-1", errors='ignore') 14 | data = pd.read_csv(f) 15 | data.head() 16 | #t=pd.DataFrame(data['comment'].astype(str)) 17 | #print(t) 18 | #data = pd.read_csv('news20200103_2.csv',encoding="unicode_escape") 19 | 20 | #print(data.head()) 21 | def chinese_word_cut(mytext): 22 | return " ".join(jieba.cut(mytext)) 23 | data['cut_comment'] = data.comment.apply(chinese_word_cut) 24 | X = data['cut_comment'] 25 | print(X) 26 | y = data.sentiment 27 | 28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=22) 29 | 30 | def get_custom_stopwords(stop_words_file): 31 | with open(stop_words_file) as f: 32 | stopwords = f.read() 33 | stopwords_list = stopwords.split('\n') 34 | custom_stopwords_list = [i for i in stopwords_list] 35 | return custom_stopwords_list 36 | 37 | stop_words_file = '哈工大停用词表.txt' 38 | 39 | stopwords = get_custom_stopwords(stop_words_file) 40 | 41 | vect = CountVectorizer(max_df = 0.8, 42 | min_df = 3, 43 | token_pattern=u'(?u)\\b[^\\d\\W]\\w+\\b', 44 | stop_words=frozenset(stopwords)) 45 | #print(vect) 46 | test = pd.DataFrame(vect.fit_transform(X_train).toarray(), columns=vect.get_feature_names()) 47 | test.head() 48 | 49 | nb = MultinomialNB() 50 | 51 | X_train_vect = vect.fit_transform(X_train) 52 | 53 | nb.fit(X_train_vect, y_train) 54 | train_score = nb.score(X_train_vect, y_train) 55 | print('准确率',train_score) 56 | 57 | # X_test_vect = vect.transform(X_test) 58 | # print(nb.score(X_test_vect, y_test)) 59 | 60 | X_vec = vect.transform(X) 61 | nb_result = nb.predict(X_vec) 62 | data['nb_result'] = nb_result 63 | 64 | 65 | -------------------------------------------------------------------------------- /news_stock.py: -------------------------------------------------------------------------------- 1 | import re 2 | import csv 3 | import pandas as pd 4 | import tushare as ts 5 | 6 | # 创建“概念_股票”的关系 7 | stockList = [] 8 | data = pd.read_csv("myJob1/kg/stock.csv") 9 | for indexs in data.index: 10 | id = data.loc[indexs].values[0] 11 | code = str(data.loc[indexs].values[2]).zfill(6) 12 | print(id, code) 13 | stockList.append([id, code]) 14 | conceptList = [] 15 | data = pd.read_csv("myJob1/kg/concept.csv") 16 | for indexs in data.index: 17 | id = data.loc[indexs].values[0] 18 | concept = data.loc[indexs].values[1] 19 | print(id, concept) 20 | conceptList.append([id, concept]) 21 | stock_concept_list = [] 22 | data = pd.read_csv("myJob1/stock_concept_prep.csv") 23 | for indexs in data.index: 24 | stock = str(data.loc[indexs].values[0]).zfill(6) 25 | concept = data.loc[indexs].values[2] 26 | stock_id = "" 27 | for j in range(len(stockList)): 28 | if stock == stockList[j][1]: 29 | stock_id = stockList[j][0] 30 | break 31 | concept_id = "" 32 | for j in range(len(conceptList)): 33 | if concept == conceptList[j][1]: 34 | concept_id = conceptList[j][0] 35 | break 36 | if stock_id!="" and concept_id!="": 37 | stock_concept_list.append([stock_id, concept_id, "概念属于", "概念属于"]) -------------------------------------------------------------------------------- /test1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from bs4 import BeautifulSoup 4 | 5 | def file_name(file_dir): 6 | for root, dirs, files in os.walk(file_dir): 7 | return files 8 | 9 | csvPath = "myJob1/executive_prep.csv" 10 | 11 | list_job = [] 12 | list_sex = [] 13 | list_age = [] 14 | list_code = [] 15 | list_name = [] 16 | 17 | # 解析html 18 | file_dir = "target" 19 | files = file_name(file_dir) 20 | for i in range(len(files)): 21 | print(i, files[i]) 22 | if ".html" in files[i]: 23 | htmlPath = "target/"+files[i] 24 | htmlfile = open(htmlPath, 'r', encoding="gbk") 25 | htmlpage = htmlfile.read() 26 | 27 | soup = BeautifulSoup(htmlpage, "html.parser") 28 | code = soup.title.string.split(" ")[0].split("(")[1][:-1] 29 | 30 | body_tag = soup.body 31 | 32 | try: 33 | # 获取序号 34 | body_tag1 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("th", class_="tc") 35 | for value in body_tag1: 36 | list_code.append(str(code)) 37 | 38 | # 获取姓名 39 | body_tag2 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("a", class_="turnto") 40 | for value in body_tag2: 41 | list_name.append(value.string) 42 | 43 | # 获取职务 44 | body_tag3 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("td", class_="jobs") 45 | for value in body_tag3: 46 | list_job.append(value.string) 47 | 48 | # 获取性别、年龄 49 | body_tag4 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("td", class_="intro") 50 | for value in body_tag4: 51 | if(len(value.string)>0): 52 | if(len(value.string.split(" "))>1): 53 | list_sex.append(value.string.split(" ")[0]) 54 | list_age.append(value.string.split(" ")[1]) 55 | else: 56 | list_sex.append("无") 57 | list_age.append("无") 58 | except: 59 | print(htmlPath+"————error") 60 | 61 | print(len(list_name), len(list_sex), len(list_age), len(list_code), len(list_job)) 62 | 63 | # for i in range(len(list_code)): 64 | # print(list_name[i], list_sex[i], list_age[i], list_code[i], list_job[i]) 65 | 66 | # 写入csv文件 67 | dataframe = pd.DataFrame({'高管姓名':list_name, '性别':list_sex, '年龄':list_age, '股票代码':list_code, '职位':list_job}) 68 | columns = ['高管姓名','性别','年龄','股票代码', '职位'] 69 | dataframe.to_csv(csvPath, index=False, columns=columns) -------------------------------------------------------------------------------- /test2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tushare as ts 3 | 4 | # 获取股票行业信息 5 | df = ts.get_industry_classified() 6 | csvPath = "myJob1/stock_industry_prep.csv" 7 | df.to_csv(csvPath, index=False) 8 | 9 | # 获取股票概念信息 10 | df = ts.get_concept_classified() 11 | csvPath = "myJob1/stock_concept_prep.csv" 12 | df.to_csv(csvPath, index=False) 13 | #print(df) 14 | stockList = [] 15 | data = ts.get_stock_basics() 16 | for indexs in data.index: 17 | name = data.loc[indexs].values[1] 18 | code = str(data.loc[indexs].values[0]).zfill(6) 19 | stockList.append([indexs, name, code]) -------------------------------------------------------------------------------- /test3.py: -------------------------------------------------------------------------------- 1 | import re 2 | import csv 3 | import pandas as pd 4 | import tushare as ts 5 | 6 | # 读取“高管”实体 7 | stockList = [] 8 | data = ts.get_stock_basics() 9 | for indexs in data.index: 10 | stockList.append(indexs) 11 | executiveList = [] 12 | data = pd.read_csv("myJob1/executive_prep.csv") 13 | sign = 0 14 | for indexs in data.index: 15 | name = data.loc[indexs].values[0] 16 | sex = data.loc[indexs].values[1] 17 | age = data.loc[indexs].values[2] 18 | code = str(data.loc[indexs].values[3]).zfill(6) 19 | job = data.loc[indexs].values[4] 20 | if code in stockList: 21 | sign = sign + 1 22 | executiveList.append([100000+sign, name, sex, age, code, job, "高管"]) 23 | with open("myJob1/kg/executive.csv","w") as csvfile: 24 | writer = csv.writer(csvfile) 25 | writer.writerow(["index:ID", "name", "sex", "age", "code", "job", ":LABEL"]) 26 | for i in range(len(executiveList)): 27 | writer.writerows([executiveList[i]]) 28 | print(i, executiveList[i], len(stockList)) 29 | 30 | # 创建“公司”实体 31 | sign = 0 32 | stockList2 = [] 33 | ''' 34 | stockList = [] 35 | data = pd.read_csv("myJob1/stock_concept_prep.csv") 36 | for indexs in data.index: 37 | name = data.loc[indexs].values[1] 38 | code = str(data.loc[indexs].values[0]).zfill(6) 39 | if [name, code] not in stockList: 40 | stockList.append([name, code]) 41 | stockList2.append([sign, name, code]) 42 | sign = sign + 1 43 | ''' 44 | data = ts.get_stock_basics() 45 | for indexs in data.index: 46 | status = "normal" 47 | if "ST" in data.loc[indexs].values[0]: 48 | status = "ST" 49 | sign = sign + 1 50 | stockList2.append([200000+sign, data.loc[indexs].values[0], indexs, status, "企业"]) 51 | with open("myJob1/kg/stock.csv","w") as csvfile: 52 | writer = csv.writer(csvfile) 53 | writer.writerow(["index:ID", "name", "code", "status", ":LABEL"]) 54 | for i in range(len(stockList2)): 55 | writer.writerows([stockList2[i]]) 56 | print(i, stockList2[i]) 57 | 58 | # 创建“概念”实体 59 | conceptList = [] 60 | data = pd.read_csv("myJob1/stock_concept_prep.csv") 61 | for indexs in data.index: 62 | concept = data.loc[indexs].values[2] 63 | if concept not in conceptList: 64 | conceptList.append(concept) 65 | with open("myJob1/kg/concept.csv","w") as csvfile: 66 | writer = csv.writer(csvfile) 67 | writer.writerow(["index:ID", "name", ":LABEL"]) 68 | for i in range(len(conceptList)): 69 | writer.writerows([[300000+i+1, conceptList[i], "概念"]]) 70 | print(i, conceptList[i]) 71 | 72 | # 创建“行业”实体 73 | industryList = [] 74 | data = pd.read_csv("myJob1/stock_industry_prep.csv") 75 | for indexs in data.index: 76 | industry = data.loc[indexs].values[2] 77 | if industry not in industryList: 78 | industryList.append(industry) 79 | with open("myJob1/kg/industry.csv","w") as csvfile: 80 | writer = csv.writer(csvfile) 81 | writer.writerow(["index:ID", "name", ":LABEL"]) 82 | for i in range(len(industryList)): 83 | writer.writerows([[400000+i+1, industryList[i], "行业"]]) 84 | print(i, industryList[i]) 85 | 86 | # 创建”高管“和”公司“的关系 87 | executiveList = [] 88 | codeList = [] 89 | data = pd.read_csv("myJob1/kg/executive.csv") 90 | for indexs in data.index: 91 | index = data.loc[indexs].values[0] 92 | name = data.loc[indexs].values[1] 93 | code = str(data.loc[indexs].values[4]).zfill(6) 94 | job = data.loc[indexs].values[5] 95 | if code not in codeList: 96 | codeList.append(code) 97 | executiveList.append([index, name, code, job]) 98 | print("done...") 99 | stockList = [] 100 | data = pd.read_csv("myJob1/kg/stock.csv") 101 | for indexs in data.index: 102 | index = data.loc[indexs].values[0] 103 | name = data.loc[indexs].values[1] 104 | code = str(data.loc[indexs].values[2]).zfill(6) 105 | stockList.append([index, name, code]) 106 | print("done...") 107 | # 双重循环 108 | execute_stock_List = [] 109 | num = 0 110 | for i in range(len(executiveList)): 111 | sign = "" 112 | for j in range(len(stockList)): 113 | if str(executiveList[i][2]) == str(stockList[j][2]): 114 | str1 = re.sub('"','', executiveList[i][3]) 115 | execute_stock_List.append([executiveList[i][0], stockList[j][0], str1, "董事会成员"]) 116 | sign = "1" 117 | break 118 | if len(sign) == 0: 119 | num = num + 1 120 | print(num, str(executiveList[i][0])+" , "+str(executiveList[i][2]), len(stockList), len(executiveList[i][2])) 121 | print("done...") 122 | # 存储去重后股票 123 | with open("myJob1/kg/executive_stock.csv","w") as csvfile: 124 | writer = csv.writer(csvfile) 125 | writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"]) 126 | for i in range(len(execute_stock_List)): 127 | writer.writerows([execute_stock_List[i]]) 128 | print(i, execute_stock_List[i]) 129 | 130 | # 创建“行业_股票”的关系 131 | stockList = [] 132 | data = pd.read_csv("myJob1/kg/stock.csv") 133 | for indexs in data.index: 134 | id = data.loc[indexs].values[0] 135 | code = str(data.loc[indexs].values[2]).zfill(6) 136 | print(id, code) 137 | stockList.append([id, code]) 138 | industryList = [] 139 | data = pd.read_csv("myJob1/kg/industry.csv") 140 | for indexs in data.index: 141 | id = data.loc[indexs].values[0] 142 | industry = data.loc[indexs].values[1] 143 | print(id, industry) 144 | industryList.append([id, industry]) 145 | stock_industry_list = [] 146 | data = pd.read_csv("myJob1/stock_industry_prep.csv") 147 | for indexs in data.index: 148 | stock = str(data.loc[indexs].values[0]).zfill(6) 149 | industry = data.loc[indexs].values[2] 150 | stock_id = "" 151 | for j in range(len(stockList)): 152 | if stock == stockList[j][1]: 153 | stock_id = stockList[j][0] 154 | break 155 | industry_id = "" 156 | for j in range(len(industryList)): 157 | if industry == industryList[j][1]: 158 | industry_id = industryList[j][0] 159 | break 160 | if stock_id!="" and industry_id!="": 161 | stock_industry_list.append([stock_id, industry_id, "行业属于", "行业属于"]) 162 | # 存储关系 163 | with open("myJob1/kg/stock_industry.csv","w") as csvfile: 164 | writer = csv.writer(csvfile) 165 | writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"]) 166 | for i in range(len(stock_industry_list)): 167 | writer.writerows([stock_industry_list[i]]) 168 | print(i, stock_industry_list[i]) 169 | 170 | # 创建“概念_股票”的关系 171 | stockList = [] 172 | data = pd.read_csv("myJob1/kg/stock.csv") 173 | for indexs in data.index: 174 | id = data.loc[indexs].values[0] 175 | code = str(data.loc[indexs].values[2]).zfill(6) 176 | print(id, code) 177 | stockList.append([id, code]) 178 | conceptList = [] 179 | data = pd.read_csv("myJob1/kg/concept.csv") 180 | for indexs in data.index: 181 | id = data.loc[indexs].values[0] 182 | concept = data.loc[indexs].values[1] 183 | print(id, concept) 184 | conceptList.append([id, concept]) 185 | stock_concept_list = [] 186 | data = pd.read_csv("myJob1/stock_concept_prep.csv") 187 | for indexs in data.index: 188 | stock = str(data.loc[indexs].values[0]).zfill(6) 189 | concept = data.loc[indexs].values[2] 190 | stock_id = "" 191 | for j in range(len(stockList)): 192 | if stock == stockList[j][1]: 193 | stock_id = stockList[j][0] 194 | break 195 | concept_id = "" 196 | for j in range(len(conceptList)): 197 | if concept == conceptList[j][1]: 198 | concept_id = conceptList[j][0] 199 | break 200 | if stock_id!="" and concept_id!="": 201 | stock_concept_list.append([stock_id, concept_id, "概念属于", "概念属于"]) 202 | # 存储关系 203 | with open("myJob1/kg/stock_concept.csv","w") as csvfile: 204 | writer = csv.writer(csvfile) 205 | writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"]) 206 | for i in range(len(stock_concept_list)): 207 | writer.writerows([stock_concept_list[i]]) 208 | print(i, stock_concept_list[i]) -------------------------------------------------------------------------------- /test4.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | zfc = '"董事长" ' 4 | 5 | k=re.sub('"','',zfc) 6 | 7 | print(k) -------------------------------------------------------------------------------- /translatetxt.py: -------------------------------------------------------------------------------- 1 | f = open('jingyongnovel2.txt') 2 | o = open('medical13.txt', 'a') 3 | sourceInLines = str(f.readlines()) 4 | str2=sourceInLines.replace("\\\'", "'") 5 | o.write(str2) 6 | -------------------------------------------------------------------------------- /translatetxt2.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 import jieba import jieba.posseg as pseg import time t1=time.time() f=open("t_with_splitter.txt","r") #读取文本 string=f.read().decode("utf-8") words = pseg.cut(string) #进行分词 result="" #记录最终结果的变量 for w in words: result+= str(w.word)+"/"+str(w.flag) #加词性标注 f=open("t_with_POS_tag.txt","w") #将结果保存到另一个文档中 f.write(result) f.close() t2=time.time() print("分词及词性标注完成,耗时:"+str(t2-t1)+"秒。") #反馈结果 2 | -------------------------------------------------------------------------------- /哈工大停用词表.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/哈工大停用词表.txt -------------------------------------------------------------------------------- /投资决策流程及模型部署文件的生成.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/投资决策流程及模型部署文件的生成.pdf -------------------------------------------------------------------------------- /股票择时投资策略2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/股票择时投资策略2.pdf --------------------------------------------------------------------------------