├── 7_1_Word2Vec.py
├── NegativeIndustry.py
├── NegativeNews2.py
├── README.md
├── data
    └── test
├── div_score.py
├── emotion.py
├── function.py
├── graph
    └── readme
├── medical2last.zip
├── news20200103.csv
├── news20200103_2.csv
├── news_industry.py
├── news_socre.py
├── news_stock.py
├── test1.py
├── test2.py
├── test3.py
├── test4.py
├── translatetxt.py
├── translatetxt2.py
├── 哈工大停用词表.txt
├── 投资决策流程及模型部署文件的生成.pdf
└── 股票择时投资策略2.pdf


/7_1_Word2Vec.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | import collections
 17 | import math
 18 | import os
 19 | import random
 20 | import zipfile
 21 | 
 22 | import numpy as np
 23 | import urllib
 24 | import tensorflow as tf
 25 | 
 26 | #Step 1: Download the data.
 27 | # url = 'http://mattmahoney.net/dc/'
 28 | #
 29 | # def maybe_download(filename, expected_bytes):
 30 | #   """Download a file if not present, and make sure it's the right size."""
 31 | #   if not os.path.exists(filename):
 32 | #     filename, _ = urllib.request.urlretrieve(url + filename, filename)
 33 | #   statinfo = os.stat(filename)
 34 | #   if statinfo.st_size == expected_bytes:
 35 | #     print('Found and verified', filename)
 36 | #   else:
 37 | #     print(statinfo.st_size)
 38 | #     raise Exception(
 39 | #         'Failed to verify ' + filename + '. Can you get to it with a browser?')
 40 | #   return filename
 41 | 
 42 | #filename = maybe_download('text8.zip', 31344016)
 43 | filename = 'medical2last.zip'
 44 | 
 45 | # Read the data into a list of strings.
 46 | def read_data(filename):
 47 |   """Extract the first file enclosed in a zip file as a list of words"""
 48 | 
 49 |   with zipfile.ZipFile(filename) as f:
 50 |     #f = open(filename, 'r', encoding=u'utf-8', errors='ignore')
 51 |     data = tf.compat.as_str(f.read(f.namelist()[0])).split()
 52 |   return data
 53 | 
 54 | words = read_data(filename)
 55 | 
 56 | #print('Data size', len(words))
 57 | 
 58 | # Step 2: Build the dictionary and replace rare words with UNK token.
 59 | vocabulary_size = 5000
 60 | 
 61 | def build_dataset(words):
 62 |   count = [['UNK', -1]]
 63 |   count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
 64 |   dictionary = dict()
 65 |   for word, _ in count:
 66 |     dictionary[word] = len(dictionary)
 67 |   data = list()
 68 |   unk_count = 0
 69 |   for word in words:
 70 |     if word in dictionary:
 71 |       index = dictionary[word]
 72 |     else:
 73 |       index = 0  # dictionary['UNK']
 74 |       unk_count += 1
 75 |     data.append(index)
 76 |   count[0][1] = unk_count
 77 |   reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
 78 |   return data, count, dictionary, reverse_dictionary
 79 | 
 80 | data, count, dictionary, reverse_dictionary = build_dataset(words)
 81 | del words  # Hint to reduce memory.
 82 | print('Most common words (+UNK)', count[:5])
 83 | print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
 84 | 
 85 | data_index = 0
 86 | 
 87 | 
 88 | # Step 3: Function to generate a training batch for the skip-gram model.
 89 | def generate_batch(batch_size, num_skips, skip_window):
 90 |   global data_index
 91 |   assert batch_size % num_skips == 0
 92 |   assert num_skips <= 2 * skip_window
 93 |   batch = np.ndarray(shape=(batch_size), dtype=np.int32)
 94 |   labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
 95 |   span = 2 * skip_window + 1 # [ skip_window target skip_window ]
 96 |   buffer = collections.deque(maxlen=span)
 97 |   for _ in range(span):
 98 |     buffer.append(data[data_index])
 99 |     data_index = (data_index + 1) % len(data)
100 |   for i in range(batch_size // num_skips):
101 |     target = skip_window  # target label at the center of the buffer
102 |     targets_to_avoid = [ skip_window ]
103 |     for j in range(num_skips):
104 |       while target in targets_to_avoid:
105 |         target = random.randint(0, span - 1)
106 |       targets_to_avoid.append(target)
107 |       batch[i * num_skips + j] = buffer[skip_window]
108 |       labels[i * num_skips + j, 0] = buffer[target]
109 |     buffer.append(data[data_index])
110 |     data_index = (data_index + 1) % len(data)
111 |   return batch, labels
112 | 
113 | batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
114 | for i in range(8):
115 |   print(batch[i], reverse_dictionary[batch[i]],
116 |       '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
117 | 
118 | # Step 4: Build and train a skip-gram model.
119 | 
120 | batch_size = 128
121 | embedding_size = 128  # Dimension of the embedding vector.
122 | skip_window = 1       # How many words to consider left and right.
123 | num_skips = 2         # How many times to reuse an input to generate a label.
124 | 
125 | # We pick a random validation set to sample nearest neighbors. Here we limit the
126 | # validation samples to the words that have a low numeric ID, which by
127 | # construction are also the most frequent.
128 | valid_size = 16     # Random set of words to evaluate similarity on.
129 | valid_window = 100  # Only pick dev samples in the head of the distribution.
130 | valid_examples = np.random.choice(valid_window, valid_size, replace=False)
131 | num_sampled = 64    # Number of negative examples to sample.
132 | 
133 | 
134 | graph = tf.Graph()
135 | with graph.as_default():
136 | 
137 |   # Input data.
138 |   train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
139 |   train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
140 |   valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
141 | 
142 |   # Ops and variables pinned to the CPU because of missing GPU implementation
143 |   with tf.device('/cpu:0'):
144 |     # Look up embeddings for inputs.
145 |     embeddings = tf.Variable(
146 |         tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
147 |     embed = tf.nn.embedding_lookup(embeddings, train_inputs)
148 | 
149 |     # Construct the variables for the NCE loss
150 |     nce_weights = tf.Variable(
151 |         tf.truncated_normal([vocabulary_size, embedding_size],
152 |                             stddev=1.0 / math.sqrt(embedding_size)))
153 |     nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
154 | 
155 |   # Compute the average NCE loss for the batch.
156 |   # tf.nce_loss automatically draws a new sample of the negative labels each
157 |   # time we evaluate the loss.
158 |   loss = tf.reduce_mean(
159 |       tf.nn.nce_loss(weights=nce_weights,
160 |                      biases=nce_biases,
161 |                      labels=train_labels,
162 |                      inputs=embed,
163 |                      num_sampled=num_sampled,
164 |                      num_classes=vocabulary_size))
165 | 
166 |   # Construct the SGD optimizer using a learning rate of 1.0.
167 |   optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
168 | 
169 |   # Compute the cosine similarity between minibatch examples and all embeddings.
170 |   norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
171 |   normalized_embeddings = embeddings / norm
172 |   valid_embeddings = tf.nn.embedding_lookup(
173 |       normalized_embeddings, valid_dataset)
174 |   similarity = tf.matmul(
175 |       valid_embeddings, normalized_embeddings, transpose_b=True)
176 | 
177 |   # Add variable initializer.
178 |   init = tf.global_variables_initializer()
179 | 
180 | # Step 5: Begin training.
181 | num_steps = 10001
182 | 
183 | with tf.Session(graph=graph) as session:
184 |   # We must initialize all variables before we use them.
185 |   init.run()
186 |   print("Initialized")
187 | 
188 |   average_loss = 0
189 |   for step in range(num_steps):
190 |     batch_inputs, batch_labels = generate_batch(
191 |         batch_size, num_skips, skip_window)
192 |     feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}
193 | 
194 |     # We perform one update step by evaluating the optimizer op (including it
195 |     # in the list of returned values for session.run()
196 |     _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict)
197 |     average_loss += loss_val
198 | 
199 |     if step % 2000 == 0:
200 |       if step > 0:
201 |         average_loss /= 2000
202 |       # The average loss is an estimate of the loss over the last 2000 batches.
203 |       print("Average loss at step ", step, ": ", average_loss)
204 |       average_loss = 0
205 | 
206 |     # Note that this is expensive (~20% slowdown if computed every 500 steps)
207 |     if step % 10000 == 0:
208 |       sim = similarity.eval()
209 |       for i in range(valid_size):
210 |         valid_word = reverse_dictionary[valid_examples[i]]
211 |         top_k = 8 # number of nearest neighbors
212 |         nearest = (-sim[i, :]).argsort()[1:top_k+1]
213 |         log_str = "Nearest to %s:" % valid_word
214 |         for k in range(top_k):
215 |           close_word = reverse_dictionary[nearest[k]]
216 |           log_str = "%s %s," % (log_str, close_word)
217 |         print(log_str)
218 |   final_embeddings = normalized_embeddings.eval()
219 | 
220 | # Step 6: Visualize the embeddings.
221 | 
222 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
223 |   assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
224 |   plt.figure(figsize=(18, 18))  #in inches
225 |   for i, label in enumerate(labels):
226 |     x, y = low_dim_embs[i,:]
227 |     plt.scatter(x, y)
228 |     plt.annotate(label,
229 |                  xy=(x, y),
230 |                  xytext=(5, 2),
231 |                  textcoords='offset points',
232 |                  ha='right',
233 |                  va='bottom')
234 | 
235 |   plt.savefig(filename)
236 | 
237 |   #%%
238 | try:
239 |   from sklearn.manifold import TSNE
240 |   import matplotlib.pyplot as plt
241 | 
242 |   tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
243 |   plot_only = 200
244 |   low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
245 |   labels = [reverse_dictionary[i] for i in range(plot_only)]
246 |   plot_with_labels(low_dim_embs, labels)
247 | 
248 | except ImportError:
249 |   print("Please install sklearn, matplotlib, and scipy to visualize embeddings.")
250 | 


--------------------------------------------------------------------------------
/NegativeIndustry.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import math
 4 | import sys
 5 | import openpyxl
 6 | 
 7 | from openpyxl import Workbook
 8 | import win32com.client
 9 | from win32com.client import Dispatch
10 | 
11 | 
12 | # wb = Workbook('data/股票负面^^^20191227.xlsx')
13 | # negativenews = wb.worksheets[0]
14 | negative = openpyxl.load_workbook('data/newdata_stock20200115_standard.xlsx')
15 | sheetsne = negative.sheetnames
16 | negativeinds = negative[sheetsne[4]]
17 | 
18 | 
19 | score = openpyxl.load_workbook('data/股票所属行业.xlsx')
20 | sheetssc = score.sheetnames
21 | sc = score[sheetssc[0]]
22 | 
23 | File = open("data/negativeindustry20200115.txt", "w", encoding=u'utf-8', errors='ignore')
24 | File.write("股票名称"+"," + "所属行业" +"\n")
25 | 
26 | i = 2
27 | j = 2
28 | for a in range(200):
29 |     ne = negativeinds.cell(row=i, column=4).value
30 |     for b in range(3635):
31 |         ser = sc.cell(row=j, column=3).value
32 |         j = j+1
33 |         k = 1
34 |         if ser==ne:
35 |             File.write(str(sc.cell(row=j, column=2).value) + ","+ str(sc.cell(row=j, column=3).value) + ","+"\n")
36 |     j = 2
37 |     i = i + 1
38 | print("ok")
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/NegativeNews2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import math
 4 | import sys
 5 | import openpyxl
 6 | 
 7 | from openpyxl import Workbook
 8 | import win32com.client
 9 | from win32com.client import Dispatch
10 | 
11 | 
12 | # wb = Workbook('data/股票负面^^^20191227.xlsx')
13 | # negativenews = wb.worksheets[0]
14 | #negative = openpyxl.load_workbook('data/negativenews20200106.xlsx')
15 | score = openpyxl.load_workbook('data/newdata_stock20200115_standard.xlsx')
16 | sheetssc = score.sheetnames
17 | sc = score[sheetssc[1]]
18 | 
19 | sheetsne = score.sheetnames
20 | negativenews = score[sheetsne[3]]
21 | 
22 | File = open("data/negativenews20200115.txt", "w", encoding=u'utf-8', errors='ignore')
23 | File.write("标题"+"," + "风险类别" +","+ "重要性" +","+ "证券代码" +","+ "证券简称" +","+ "公司全称"+","+ "来源" +","+ "时间"+"\n")
24 | 
25 | i = 2
26 | j = 2
27 | for a in range(396):
28 |     ne = negativenews.cell(row=i, column=6).value
29 |     for b in range(200):
30 |         ser = sc.cell(row=j, column=3).value
31 |         j = j+1
32 |         k = 1
33 |         if ser==ne:
34 |             File.write(str(negativenews.cell(row=i, column=2).value) + ","+ \
35 |                        str(negativenews.cell(row=i, column=3).value) + "," + str(negativenews.cell(row=i, column=4).value)+ ","+ \
36 |                        str(negativenews.cell(row=i, column=5).value) + "," + str(negativenews.cell(row=i, column=6).value)+ ","+ \
37 |                        str(negativenews.cell(row=i, column=7).value) + "," + str(negativenews.cell(row=i, column=8).value) + "," + \
38 |                        str(negativenews.cell(row=i, column=9).value) + "," + "\n")
39 |     j = 2
40 |     i = i + 1
41 | print("ok")
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Knowledge-Graph-实现步骤
2 | 1:投资策略构建步骤及思路见文件：股票择时投资策略2.pdf
3 | 2：选股模型及模型部署见：投资决策流程及模型部署文件的生成.pdf
4 | 3：
5 | 
6 | 
7 | 功能：
8 |  自动分析非结构化的实体，抽取三元组关系，并自动构建图谱。同时在关系的抽取上增加了关系的权重，使得构建的关系具备强弱关系，不仅能做知识推理还能做因果推理和逻辑推理，
9 | 


--------------------------------------------------------------------------------
/data/test:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/div_score.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import math
  4 | import sys
  5 | sys.path.append('E:/crystal-forcase/alpha-stock')
  6 | import function
  7 | big = float("inf")
  8 | 
  9 | 
 10 | 
 11 | #读txt文件的代码
 12 | csv_file = 'data/20200115_utf8.txt'
 13 | f = open(csv_file, 'r', encoding=u'utf-8', errors='ignore')
 14 | df = pd.read_csv(f, sep='\t')
 15 | # df.dropna(inplace=True)
 16 | # 读csv的代码
 17 | # csv_file = 'data/newdata_stock20191203-13_standard.csv'
 18 | # f = open(csv_file, 'r', encoding=u'utf-8', errors='ignore')
 19 | # df = pd.read_csv(f)
 20 | #print(df)
 21 | 
 22 | 
 23 | #File = open("data/score.txt", "w")
 24 | File = open("data/score20200115.txt", "w", encoding=u'utf-8', errors='ignore')
 25 | File.write("id"+"," + "code" +","+ "name" +","+ "score" +"\n")
 26 | 
 27 | for i in range(3659):
 28 |     incomev = df.iloc[i:i + 1, 8:9].values
 29 |     incomegrowthratev = df.iloc[i:i + 1, 20:21].values
 30 |     revenuesv = df.iloc[i:i + 1, 6:7].values
 31 |     revenuesgrowthratev = df.iloc[i:i + 1, 21:22].values
 32 |     ROEv = df.iloc[i:i + 1, 11:12].values
 33 |     pb=df.iloc[i:i+1, 5:6].values
 34 |     pe=df.iloc[i:i+1, 10:11].values
 35 |     Investmentincomev = df.iloc[i:i + 1, 19:20].values
 36 |     Persharereservev = df.iloc[i:i + 1, 13:14].values
 37 |     Netassetspersharev = df.iloc[i:i + 1, 7:8].values
 38 |     EPSv = df.iloc[i:i + 1, 4:5].values
 39 |     Operatingcashpersharev = df.iloc[i:i + 1, 11:12].values
 40 |     Currentliabilityv = df.iloc[i:i + 1, 14:15].values
 41 |     cashflowv = df.iloc[i:i + 1, 18:19].values
 42 |     stockholderequityv = df.iloc[i:i + 1, 16:17].values
 43 |     Shareholdersequityratiov = df.iloc[i:i + 1, 22:23].values
 44 |     operatingprofitv = df.iloc[i:i + 1, 9:10].values
 45 |     capitalsurplusv = df.iloc[i:i + 1, 17:18].values
 46 |     grossprofitratiov = df.iloc[i:i + 1, 23:24].values
 47 |     longtermdebtv = df.iloc[i:i + 1, 15:16].values
 48 |     volumeoftransactionv = df.iloc[i:i + 1, 27:28].values
 49 |     amountoftransactionv = df.iloc[i:i + 1, 28:29].values
 50 |     stagerangev = df.iloc[i:i + 1, 25:26].values
 51 |     KDJ_v = df.iloc[i:i + 1, 60:61].values
 52 |     RSI_v = df.iloc[i:i + 1, 59:60].values
 53 |     VAR33_v = df.iloc[i:i + 1, 30:31].values
 54 |     VAR35_v = df.iloc[i:i + 1, 32:33].values
 55 |     VAR36_v = df.iloc[i:i + 1, 33:34].values
 56 |     VAR37_v = df.iloc[i:i + 1, 34:35].values
 57 |     VAR38_v=df.iloc[i:i+1, 35:36].values
 58 |     VAR39_v=df.iloc[i:i+1, 36:37].values
 59 |     VAR40_v=df.iloc[i:i+1, 37:38].values
 60 |     VAR41_v=df.iloc[i:i+1, 38:39].values
 61 |     VAR43_v=df.iloc[i:i+1, 40:41].values
 62 |     VAR45_v=df.iloc[i:i+1, 42:43].values
 63 |     VAR46_v=df.iloc[i:i+1, 43:44].values
 64 |     VAR48_v=df.iloc[i:i+1, 45:46].values
 65 |     VAR49_v = df.iloc[i:i + 1, 46:47].values
 66 |     VAR50_v = df.iloc[i:i + 1, 47:48].values
 67 |     VAR51_v = df.iloc[i:i + 1, 48:49].values
 68 |     VAR53_v = df.iloc[i:i + 1, 50:51].values
 69 |     VAR55_v = df.iloc[i:i + 1, 52:53].values
 70 |     VAR56_v = df.iloc[i:i + 1, 53:54].values
 71 |     VAR58_v = df.iloc[i:i + 1, 55:56].values
 72 |     VAR60_v = df.iloc[i:i + 1, 57:58].values
 73 |     VAR61_v = df.iloc[i:i + 1, 58:59].values
 74 | 
 75 |     print(i)
 76 |     #score=function.PB_1(pb)+function.PE_1(pe)
 77 |     score = function.income1(incomev) +function.incomegrowthrate1(incomegrowthratev)+function.revenues1(revenuesv) + \
 78 |             function.revenuesgrowthrate1(revenuesgrowthratev)+\
 79 |             function.ROE1(ROEv) + function.Investmentincome1(Investmentincomev) + function.Persharereserve1(Persharereservev) + \
 80 |             function.Netassetspershare1(Netassetspersharev)+\
 81 |             function.EPS1(EPSv) + function.Operatingcashpershare1(Operatingcashpersharev) + \
 82 |             function.Currentliability1(Currentliabilityv) + function.cashflow1(cashflowv)+\
 83 |             function.stockholderequity1(stockholderequityv) + function.Shareholdersequityratio1(Shareholdersequityratiov) + \
 84 |             function.operatingprofit1(operatingprofitv) + function.capitalsurplus1(capitalsurplusv)+\
 85 |             function.grossprofitratio1(grossprofitratiov) + function.longtermdebt1(longtermdebtv) + \
 86 |             function.volumeoftransaction1(volumeoftransactionv) + function.amountoftransaction1(amountoftransactionv)+\
 87 |             function.stagerange1(stagerangev) + function.KDJ_2(KDJ_v) + \
 88 |             function.PB_1(pb)+function.PE_1(pe)+ function.RSI(RSI_v) + \
 89 |             function.VAR33_1(VAR33_v)+\
 90 |             function.VAR35_1(VAR35_v) + function.VAR36_1(VAR36_v) + function.VAR37_1(VAR37_v) + function.VAR38_1(VAR38_v)+\
 91 |             function.VAR39_1(VAR39_v) + function.VAR40_1(VAR40_v) + function.VAR43_1(VAR43_v) + function.VAR45_1(VAR45_v)+\
 92 |             function.VAR46_1(VAR46_v) + function.VAR48_1(VAR48_v) + function.VAR49_1(VAR49_v) + function.VAR50_1(VAR50_v)+\
 93 |             function.VAR51_1(VAR51_v) + function.VAR53_1(VAR53_v) + function.VAR55_1(VAR55_v) + function.VAR56_1(VAR56_v)+\
 94 |             function.VAR58_1(VAR58_v) + function.VAR60_1(VAR60_v) + function.VAR61_1(VAR61_v)
 95 | 
 96 |     #File.write(str(df.iloc[i:i+1, 0:1].values)+"," + str(df.iloc[i:i+1, 1:2].values)+"," + "\n")
 97 |     File.write(str(i)+"," + str(df.iloc[i:i+1, 0:1].values)+"," + str(df.iloc[i:i+1, 1:2].values) +"," + str(score) +"," + "\n")
 98 | 
 99 | File.close()
100 | print("ok!!!")


--------------------------------------------------------------------------------
/emotion.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from snownlp import SnowNLP
 3 | 
 4 | 
 5 | text2 = '2019年拉美地区经济增速放缓'
 6 | text1 = '大数据“杀熟”？ 鲜花包月质量忽上忽下的原因终于找到了,减薪裁员、旅客锐减……港媒：香港为春节经济忧愁'
 7 | 
 8 | s1 = SnowNLP(text1)
 9 | s2 = SnowNLP(text2)
10 | 
11 | print(s1.sentiments,s2.sentiments)


--------------------------------------------------------------------------------
/function.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | 
  4 | big = float("inf")
  5 | 
  6 | def KDJ_2(KDJ_2):
  7 |     if not KDJ_2 is None: gb = 0.0001
  8 |     if KDJ_2<43.028: gb= 0.020408
  9 |     if KDJ_2>= 43.028 and KDJ_2 < 63.078: gb= 6.90263
 10 |     if KDJ_2>= 63.078 and KDJ_2 < 67.891: gb= 14.43635
 11 |     if KDJ_2>= 67.891 and KDJ_2 < 71.247: gb= 11.56308
 12 |     if KDJ_2>= 71.247 and KDJ_2 < 77.165: gb= 23.43814
 13 |     if KDJ_2>= 77.165 and KDJ_2 < 82.906: gb= 22.65665
 14 |     if KDJ_2>= 82.906 and KDJ_2 < 88.605: gb= 19.58754
 15 |     if KDJ_2 >= 88.605 and KDJ_2 < big: gb = 21.320709
 16 |     return gb
 17 | 
 18 | 
 19 | def RSI(RSI):
 20 |     if not RSI is None: gb = 0.0001
 21 |     if RSI < 1.384: gb= 6.23399
 22 |     if RSI>= 1.384 and RSI < 30.405: gb= 7.864
 23 |     if RSI>= 30.405 and RSI < 99.759: gb= 11.9817
 24 |     if RSI>= 99.759 and RSI < big: gb= 0.020408
 25 |     return gb
 26 | 
 27 | def VAR33_1(VAR33):
 28 |     if not VAR33 is None: gb = 0.0001
 29 |     if VAR33 < 0: gb= 3.84736
 30 |     if VAR33 >= 0 and VAR33 < 1: gb= 0.020408
 31 |     if VAR33 >= 1: gb = 0.0190408
 32 |     return gb
 33 | 
 34 | 
 35 | def VAR35_1(VAR35):
 36 |     if not VAR35 is None: gb = 0.0001
 37 |     if VAR35 < 77319.0: gb= 24.5021
 38 |     if VAR35>= 77319.0 and VAR35 < 106120.0: gb= 24.13250
 39 |     if VAR35>= 106120.0 and VAR35 < 163609: gb= 14.6890
 40 |     if VAR35>= 163609 and VAR35 < 219444: gb= 17.6607
 41 |     if VAR35>= 219444 and VAR35 < 329607.0: gb= 4.4077
 42 |     if VAR35>= 329607.0 and VAR35 < 1491059: gb= 4.67171
 43 |     if VAR35>= 1491059 and VAR35 < 2147483647: gb= 0.02040
 44 |     if VAR35>= 2147483647 and VAR35 < big: gb= 0.019
 45 |     return gb
 46 | 
 47 | 
 48 | def VAR36_1(VAR36):
 49 |     if not VAR36 is None: gb = 0.0001
 50 |     if VAR36 < 8779.51  : gb= 4.2437
 51 |     if VAR36>= 8779.51  and VAR36 < 12446.86 : gb= 2.7942
 52 |     if VAR36>= 12446.86  and VAR36 < 16709.87  : gb= 7.9176
 53 |     if VAR36>= 16709.87  and VAR36 < 22351.1 : gb= 11.7639
 54 |     if VAR36>= 22351.1  and VAR36 < 29362.59 : gb= 5.9812
 55 |     if VAR36>= 29362.59  and VAR36 < 43957.96 : gb= 1.15117
 56 |     if VAR36>= 43957.96 and VAR36 < 68756.27: gb= 4.7812
 57 |     if VAR36>= 68756.27  and VAR36 < 106871.38 : gb= 4.8037
 58 |     if VAR36>= 106871.38  and VAR36 < big : gb= 0.0204
 59 |     return gb
 60 | 
 61 | 
 62 | def VAR37_1(VAR37):
 63 |     if not VAR37 is None: gb = 0.020408
 64 |     if VAR37 < big: gb= 2.11007
 65 |     return gb
 66 | 
 67 | 
 68 | def VAR38_1(VAR38):
 69 |     if not VAR38 is None: gb = 1.70545
 70 |     if VAR38 < 0  : gb= 5.92976
 71 |     if VAR38>= 0  and VAR38 < big: gb= 0.020408
 72 |     return gb
 73 | 
 74 | def VAR39_1(VAR39):
 75 |     if not VAR39 is None: gb = 0.020408
 76 |     if VAR39 < 1  : gb= 2.11007
 77 |     if VAR39 >=  1 : gb= 0.0204
 78 |     return gb
 79 | 
 80 | 
 81 | def VAR40_1(VAR40):
 82 |     if not VAR40 is None : gb= 20.6475
 83 |     if VAR40>= 0 and VAR40 < 81311 : gb= 0.02040
 84 |     if VAR40>= 81311  and VAR40 < 160720 : gb=12.9211
 85 |     if VAR40>= 160720  and VAR40 < 257176 : gb=17.0822
 86 |     if VAR40>= 257176  and VAR40 < 333971 : gb=27.4035
 87 |     if VAR40>=  333971 and VAR40 < 499427  : gb=20.2264
 88 |     if VAR40>=  499427 and VAR40 < 716185 : gb=30.3518
 89 |     if VAR40>= 716185 and VAR40 < 1039738: gb=41.2039
 90 |     if VAR40>=  1039738 and VAR40 < big : gb=34.7442
 91 |     return gb
 92 | 
 93 | 
 94 | def VAR41_1(VAR41):
 95 |     if not VAR41 is None: gb=20.0317
 96 |     if VAR41>= 0 and VAR41 < 11271.18 : gb=31.0293
 97 |     if VAR41>= 11271.18  and VAR41 < 17319.41 : gb=31.4851
 98 |     if VAR41>= 17319.41  and VAR41 < 22315.25 : gb=28.59064
 99 |     if VAR41>= 22315.25  and VAR41 < 28671.46 : gb=27.63547
100 |     if VAR41>= 28671.46 and VAR41 < 36942.3 : gb=30.9357
101 |     if VAR41>= 36942.3  and VAR41 < 50587.07 : gb=18.88754
102 |     if VAR41>= 50587.07  and VAR41 < 71743.51 : gb=23.1686
103 |     if VAR41>= 71743.51  and VAR41 < 171064 : gb=11.171407
104 |     if VAR41>= 171064  and VAR41 < big  : gb=0.020408
105 |     return gb
106 | 
107 | 
108 | def VAR43_1(VAR43):
109 |     if not VAR43 is None: gb = 0.0001
110 |     if VAR43<0 : gb=6.14222
111 |     if VAR43>= 0  and VAR43 <1: gb=0.020408
112 |     if VAR43 >= 1: gb = 0.01925
113 |     return gb
114 | 
115 | 
116 | def VAR45_1(VAR45):
117 |     if not VAR45 is None: gb = 0.0001
118 |     if VAR45>= 0 and VAR45 < 72020 : gb=13.3649
119 |     if VAR45>= 72020 and VAR45 < 131110 : gb=4.43968
120 |     if VAR45>= 131110  and VAR45 < 205731: gb=14.18636
121 |     if VAR45>= 205731 and VAR45 < 294651.0 : gb=11.38658
122 |     if VAR45>= 294651 and VAR45 < 452797 : gb=17.25659
123 |     if VAR45>= 452797 and VAR45 < 763087 : gb=18.0357
124 |     if VAR45>= 763087 and VAR45 < big : gb=0.02040
125 |     return gb
126 | 
127 | 
128 | def VAR46_1(VAR46):
129 |     if not VAR46 is None: gb = 0.0001
130 |     if VAR46 < 7500.41: gb=11.21801
131 |     if VAR46>= 7500.41  and VAR46 < 12116.53: gb=13.16524
132 |     if VAR46>= 12116.53  and VAR46 < 20201.43: gb=9.23118
133 |     if VAR46>= 20201.43  and VAR46 < 23772.09: gb=0.06967
134 |     if VAR46>= 23772.09 and VAR46 < 29947.4 : gb=7.9699
135 |     if VAR46>= 29947.4 and VAR46 < 37733.76 : gb=3.56138
136 |     if VAR46>= 37733.76  and VAR46 < 47715.75: gb=6.6857
137 |     if VAR46>= 47715.75 and VAR46 < 66729.84 : gb=5.4682
138 |     if VAR46>= 66729.84 and VAR46 < 97586.22 : gb=0.020408
139 |     if VAR46>= 97586.22  and VAR46 < 155363.81: gb=13.0273
140 |     if VAR46 >= 155363.81 and VAR46 < big: gb = 9.8571
141 |     return gb
142 | 
143 | def VAR48_1(VAR48):
144 |     if not VAR48 is None: gb= 0.020408
145 |     if VAR48<0: gb= 1.10442
146 |     if VAR48>=0   and VAR48 <1: gb= 0.17589
147 |     if VAR48 >= 1: gb = 0.019589
148 |     return gb
149 | 
150 | 
151 | def VAR49_1(VAR49):
152 |     if not VAR49 is None: gb= 0.02040
153 |     if VAR49 < 1 : gb= 0.8647
154 |     if VAR49 >= 1: gb = 0.019
155 |     return gb
156 | 
157 | 
158 | def VAR50_1(VAR50):
159 |     if not VAR50 is None: gb= 10.2411
160 |     if VAR50>= 0  and VAR50 < 88923 : gb= 3.77103
161 |     if VAR50>= 88923   and VAR50 < 130928 : gb= 2.58228
162 |     if VAR50>= 130928  and VAR50 < 184901 : gb= 13.17023
163 |     if VAR50>= 184901  and VAR50 < 240270 : gb= 3.9062
164 |     if VAR50>= 240270  and VAR50 < 324552 : gb= 7.9744
165 |     if VAR50>= 324552 and VAR50 < 501716 : gb= 0.79015
166 |     if VAR50>= 501716  and VAR50 < 710920 : gb= 0.020408
167 |     if VAR50>= 710920  and VAR50 < 1136161 : gb= 1.167402
168 |     if VAR50>= 1136161  and VAR50 < big: gb= 2.73416
169 |     return gb
170 | 
171 | 
172 | def VAR51_1(VAR51):
173 |     if not VAR51 is None: gb= 6.22334
174 |     if VAR51>= 0  and VAR51 < 7423.08 : gb= 22.86327
175 |     if VAR51>= 7423.08  and VAR51 < 11214.3 : gb= 15.30725
176 |     if VAR51>= 11214.3  and VAR51 < 19171.22 : gb= 17.136336
177 |     if VAR51>= 19171.22  and VAR51 < 25500.7 : gb= 7.82033
178 |     if VAR51>= 25500.7  and VAR51 < 45908.92 : gb= 3.01746
179 |     if VAR51>= 45908.92  and VAR51 < 76280.34 : gb= 4.3366
180 |     if VAR51>= 76280.34 and VAR51 < 102100.6 : gb= 3.90378
181 |     if VAR51>= 102100.6  and VAR51 < 165718.17 : gb=5.13207
182 |     if VAR51>= 165718.17  and VAR51 < big : gb= 0.020408
183 |     return gb
184 | 
185 | def VAR53_1(VAR53):
186 |     if not VAR53 is None: gb = 0.0001
187 |     if VAR53<0 : gb= 0.54369
188 |     if VAR53>= 0 and VAR53 < 1: gb= 0.020408
189 |     if VAR53 >= 1 : gb = 0.019408
190 |     return gb
191 | 
192 | 
193 | def VAR55_1(VAR55):
194 |     if not VAR55 is None: gb = 0.0001
195 |     if VAR55>= 0  and VAR55 < 64980 : gb=1.3912
196 |     if VAR55>= 64980  and VAR55 < 106045 : gb=10.97048
197 |     if VAR55>= 106045  and VAR55 < 134082 : gb=1.147016
198 |     if VAR55>= 134082  and VAR55 < 166444 : gb=4.27181
199 |     if VAR55>=  166444 and VAR55 < 199383 : gb=0.020408
200 |     if VAR55>= 199383  and VAR55 < 245264 : gb=0.54963
201 |     if VAR55>= 245264  and VAR55 < 335063 : gb=13.57916
202 |     if VAR55>= 335063  and VAR55 < 473830 : gb=22.27843
203 |     if VAR55>= 473830  and VAR55 < 905185 : gb=20.370161
204 |     if VAR55>= 905185  and VAR55 < 1583630 : gb=24.81614
205 |     if VAR55>= 1583630  and VAR55 < big : gb=35.176292
206 |     return gb
207 | 
208 | 
209 | def VAR56_1(VAR56):
210 |     if not VAR56 is None: gb = 0.0001
211 |     if VAR56 < 7222.94  : gb=33.01424
212 |     if VAR56>= 7222.94  and VAR56 < 13759.4 : gb=36.35627
213 |     if VAR56>= 13759.4  and VAR56 < 18704.85  : gb=20.1377
214 |     if VAR56>= 18704.85  and VAR56 < 25932.25 : gb=28.41691
215 |     if VAR56>= 25932.25  and VAR56 < 43917.25 : gb=18.19527
216 |     if VAR56>=  43917.25 and VAR56 < 67007.26 : gb=11.99811
217 |     if VAR56>=  67007.26 and VAR56 < 118839.54 : gb=6.55267
218 |     if VAR56>=  118839.54 and VAR56 < big  : gb=0.020408
219 |     return gb
220 | 
221 | 
222 | def VAR58_1(VAR58):
223 |     if not VAR58 is None: gb = 0.0001
224 |     if VAR58<0: gb=0.020408
225 |     if VAR58>= 0  and VAR58 < 1  : gb=2.824744
226 |     if VAR58 >= 1: gb =0.01955
227 |     return gb
228 | 
229 | def VAR60_1(VAR60):
230 |     if not VAR60 is None: gb = 0.0001
231 |     if VAR60 < 9265.39 : gb=3.93215
232 |     if VAR60>= 9265.39  and VAR60 < 14198 : gb=0.020408
233 |     if VAR60>= 14198  and VAR60 < 18082 : gb=6.07631
234 |     if VAR60>= 18082  and VAR60 < 31583  : gb=7.05776
235 |     if VAR60>= 31583  and VAR60 < 46625  : gb=9.17736
236 |     if VAR60>= 46625  and VAR60 < 66075 : gb=1.72721
237 |     if VAR60>= 66075 and VAR60 < 95239  : gb=4.64637
238 |     if VAR60>= 95239  and VAR60 < 126157 : gb=0.79226
239 |     if VAR60>= 126157  and VAR60 < 252287 : gb=10.9973
240 |     if VAR60>= 252287 and VAR60 < big: gb=7.46181
241 |     return gb
242 | 
243 | 
244 | def VAR61_1(VAR61):
245 |     if not VAR61 is None: gb = 0.0001
246 |     if VAR61 < 1143.44  : gb=6.94237
247 |     if VAR61>= 1143.44  and VAR61 < 1718.85 : gb=0.020408
248 |     if VAR61>= 1718.85  and VAR61 < 2278.54 : gb=6.5346
249 |     if VAR61>= 2278.54  and VAR61 < 2910.02 : gb=5.90171
250 |     if VAR61>= 2910.02  and VAR61 < 4531.11 : gb=14.12564
251 |     if VAR61>= 4531.11  and VAR61 < 9736.39  : gb=7.47721
252 |     if VAR61>= 9736.39  and VAR61 < big : gb=8.79076
253 |     return gb
254 | 
255 | #主营收入
256 | def income1(income):
257 |     if not income is None: gb = 0.0001
258 |     if income < 50284: gb = 2.557202
259 |     if income >= 50284  and income < 67710: gb = 1.54461
260 |     if income >= 67710  and income < 84880: gb = 8.5168
261 |     if income >= 84880  and income < 114529: gb = 0.020408
262 |     if income >= 114529 and income < 148444: gb = 8.80206
263 |     if income >= 148444  and income < 266865: gb = 7.00573
264 |     if income >= 266865  and income < 647837: gb = 9.94524
265 |     if income >= 647837  and income < 1204372: gb = 0.14144
266 |     if income >= 1204372: gb = 6.49023
267 |     return gb
268 | 
269 | #主营收入同比
270 | def incomegrowthrate1(incomegrowthrate):
271 |     if  not incomegrowthrate is None: gb = 0.0001
272 |     if  incomegrowthrate < 3.248 : gb=4.071247
273 |     if  incomegrowthrate>= 3.248  and  incomegrowthrate < 11.568 : gb=0.9817
274 |     if  incomegrowthrate>= 11.568  and  incomegrowthrate < 17.018  : gb=6.95596
275 |     if  incomegrowthrate>= 17.018  and  incomegrowthrate < 22.093 : gb=0.020408
276 |     if  incomegrowthrate>= 22.093 and  incomegrowthrate < 41.593 : gb=2.27162
277 |     if  incomegrowthrate>= 41.593  and  incomegrowthrate < big  : gb=7.62120
278 |     return gb
279 | 
280 | #净利润
281 | def revenues1(revenues):
282 |     if not revenues is None: gb = 0.0001
283 |     if revenues < 411 : gb=26.83916
284 |     if revenues>= 411  and revenues < 3079 : gb=7.50711
285 |     if revenues>= 3079  and revenues < 5152  : gb=0.020408
286 |     if revenues>= 5152 and revenues < 7530 : gb=8.39818
287 |     if revenues>= 7530  and revenues < 10842 : gb=10.3772
288 |     if revenues>= 10842  and revenues < 21707 : gb=1.14736
289 |     if revenues>= 1.14736  and revenues < 31428 : gb=7.76754
290 |     if revenues>= 31428  and revenues < 53050 : gb=1.69234
291 |     if revenues>= 53050  and revenues < 100060 : gb=4.03374
292 |     if revenues>= 100060  and revenues < big: gb=12.01157
293 |     return gb
294 | 
295 | #净利润同比
296 | def revenuesgrowthrate1(revenuesgrowthrate):
297 |     if not revenuesgrowthrate is None: gb = 0.0001
298 |     if revenuesgrowthrate<0: gb=11.43988
299 |     if revenuesgrowthrate>= 0  and revenuesgrowthrate < 7.409: gb=11.62207
300 |     if revenuesgrowthrate>= 7.409  and revenuesgrowthrate < 18.649: gb=0.020408
301 |     if revenuesgrowthrate>= 18.649  and revenuesgrowthrate < 39.96: gb=8.35751
302 |     if revenuesgrowthrate>= 39.96  and revenuesgrowthrate < 170.955: gb=10.91314
303 |     if revenuesgrowthrate>= 170.955  and revenuesgrowthrate < big: gb=8.19601
304 |     return gb
305 | 
306 | #净资产收益率
307 | def ROE1(ROE):
308 |     if not ROE is None: gb=6.37555
309 |     if ROE<0: gb=6.21541
310 |     if ROE>=0   and ROE < 1: gb=0.020408
311 |     if ROE >= 1 : gb = 0.01908
312 |     return gb
313 | 
314 | #市净率
315 | def PB_1(pb):
316 |     if not pb is None: gb = 0.020408
317 |     if pb < 1.28: gb = 18.8844
318 |     if pb >= 1.28 and pb < 1.52: gb = 21.428206
319 |     if pb >= 1.52 and pb < 1.74: gb = 16.631003
320 |     if pb >= 1.74 and pb < 2.19: gb = 20.34075
321 |     if pb >= 2.19 and pb < 2.68: gb = 14.47257
322 |     if pb >= 2.68 and pb < 3.48: gb = 29.04896
323 |     if pb >= 3.48 and pb < 5.15: gb = 27.82325
324 |     if pb < big: gb = 37.95336
325 |     return gb
326 | 
327 | #市盈率
328 | def PE_1(PE):
329 |     if not PE is None: gb=0.020408
330 |     if PE < 1 : gb=89.73698
331 |     if PE>= 1  and PE < 14.35 : gb=20.73637
332 |     if PE>=14.35   and PE < 17.48  : gb=14.0666
333 |     if PE>= 17.48  and PE <20.69  : gb=10.25078
334 |     if PE>= 20.69  and PE < 28.93  : gb=14.3363
335 |     if PE>=28.93   and PE < 38.85 : gb=9.7639
336 |     if PE>= 38.85  and PE < 54.79 : gb=22.97599
337 |     if PE>=54.79   and PE < 112.89 : gb=15.97741
338 |     if PE < big: gb=21.648014
339 |     return gb
340 | 
341 | #投资收益
342 | def Investmentincome1(Investmentincome):
343 |     if not Investmentincome is None: gb = 0.18701
344 |     if Investmentincome < 28: gb= 1.1904
345 |     if Investmentincome>=28   and Investmentincome <120  : gb=0.020408
346 |     if Investmentincome>=120   and Investmentincome <287  : gb=11.9747
347 |     if Investmentincome>= 287  and Investmentincome <709  : gb=1.61483
348 |     if Investmentincome>= 709  and Investmentincome <2317  : gb=3.827008
349 |     if Investmentincome>= 2317  and Investmentincome < 7014 : gb=6.2092
350 |     if Investmentincome>=7014   and Investmentincome < big : gb=5.35745
351 |     return gb
352 | 
353 | #每股公积金
354 | def Persharereserve1(Persharereserve):
355 |     if not Persharereserve is None: gb=12.91906
356 |     if Persharereserve < 1  : gb=8.041266
357 |     if Persharereserve>= 1  and Persharereserve <1.25  : gb=3.038358
358 |     if Persharereserve>= 1.25  and Persharereserve <1.736 : gb=10.59354
359 |     if Persharereserve>= 1.736  and Persharereserve < 2.001 : gb=5.60317
360 |     if Persharereserve>= 2.001  and Persharereserve < 2.332  : gb=6.54077
361 |     if Persharereserve>=2.332  and Persharereserve < 2.647  : gb=3.06069
362 |     if Persharereserve>= 2.647  and Persharereserve <3.67  : gb=4.45335
363 |     if Persharereserve>= 3.67  and Persharereserve < 4.944 : gb=4.3002
364 |     if Persharereserve>= 4.944  and Persharereserve < big: gb=0.020408
365 |     return gb
366 | 
367 | #每股净资产
368 | def Netassetspershare1(Netassetspershare):
369 |     if not Netassetspershare is None: gb = 0.0001
370 |     if Netassetspershare <1.967  : gb=0.020408
371 |     if Netassetspershare>= 1.967  and Netassetspershare <2.828  : gb=4.2333
372 |     if Netassetspershare>= 2.828  and Netassetspershare <3.28  : gb=0.6359
373 |     if Netassetspershare>= 3.28  and Netassetspershare < 4.03  : gb=7.94902
374 |     if Netassetspershare>= 4.03  and Netassetspershare < 4.628 : gb=12.5024
375 |     if Netassetspershare>= 4.628  and Netassetspershare < 5.389 : gb=4.53942
376 |     if Netassetspershare>= 5.389  and Netassetspershare < 6.368 : gb=9.01132
377 |     if Netassetspershare>= 6.368  and Netassetspershare < 9.255  : gb=19.718074
378 |     if Netassetspershare>= 9.255  and Netassetspershare < big  : gb=31.99056
379 |     return gb
380 | 
381 | #每股收益
382 | def EPS1(EPS):
383 |     if not EPS is None: gb=66.897076
384 |     if EPS<0: gb=0.020408
385 |     if EPS>= 0  and EPS < big : gb=101.96149
386 |     return gb
387 | 
388 | #每股经营现金
389 | def Operatingcashpershare1(Operatingcashpershare):
390 |     if not Operatingcashpershare is None: gb = 0.0001
391 |     if Operatingcashpershare<0 : gb=1.74793
392 |     if Operatingcashpershare>= 0  and Operatingcashpershare <1: gb=3.479613
393 |     if Operatingcashpershare>= 1  and Operatingcashpershare < big: gb=0.020408
394 |     return gb
395 | 
396 | #流动负债
397 | def Currentliability1(Currentliability):
398 |     if not Currentliability is None : gb=0.020408
399 |     if Currentliability < 17749: gb=42.657299
400 |     if Currentliability>= 17749  and Currentliability < 41872  : gb= 31.37411
401 |     if Currentliability>= 41872  and Currentliability < 67805 : gb=35.86446
402 |     if Currentliability>= 67805  and Currentliability < 94403 : gb=28.40803
403 |     if Currentliability>= 94403  and Currentliability < 118701 : gb=33.06355
404 |     if Currentliability>= 118701   and Currentliability < 154040  : gb=22.87249
405 |     if Currentliability>= 154040  and Currentliability < 293731  : gb=26.96313
406 |     if Currentliability>= 293731  and Currentliability < big : gb=19.97311
407 |     return gb
408 | 
409 | #经营现金流量
410 | def cashflow1(cashflow):
411 |     if not cashflow is None: gb = 0.0001
412 |     if cashflow<1: gb=2.035205
413 |     if cashflow >=1   and cashflow  < 5567  : gb=0.24589
414 |     if cashflow >= 5567   and cashflow  <12232  : gb=4.45852
415 |     if cashflow >= 12232  and cashflow  < 18996  : gb=0.020408
416 |     if cashflow >= 18996  and cashflow  < 34002 : gb=1.02909
417 |     if cashflow >= 34002  and cashflow  < 82981 : gb=2.18616
418 |     if cashflow >= 82981  and cashflow  < big : gb=8.71717
419 |     return gb
420 | 
421 | #股东权益
422 | def stockholderequity1(stockholderequity):
423 |     if not stockholderequity is None: gb = 0.0001
424 |     if stockholderequity < 52676  : gb=2.19407
425 |     if stockholderequity>= 52676  and stockholderequity <102078: gb=0.020408
426 |     if stockholderequity>= 102078  and stockholderequity <144376: gb=8.1202
427 |     if stockholderequity>= 144376  and stockholderequity < 181280: gb=10.86791
428 |     if stockholderequity>= 181280  and stockholderequity <278681: gb=7.95683
429 |     if stockholderequity>= 278681  and stockholderequity < 453098: gb=9.81188
430 |     if stockholderequity>= 453098  and stockholderequity < 652440: gb=11.1213
431 |     if stockholderequity>=652440  and stockholderequity <1230921: gb=11.12022
432 |     if stockholderequity>=1230921   and stockholderequity < big  : gb=15.01879
433 |     return gb
434 | 
435 | #股东权益比
436 | def Shareholdersequityratio1(Shareholdersequityratio):
437 |     if not Shareholdersequityratio is None: gb = 0.0001
438 |     if Shareholdersequityratio < 29.101 : gb=0.020408
439 |     if Shareholdersequityratio>= 29.101  and Shareholdersequityratio < 37.261 : gb=12.81002
440 |     if Shareholdersequityratio>= 37.261  and Shareholdersequityratio <45.082  : gb=10.53649
441 |     if Shareholdersequityratio>= 45.082  and Shareholdersequityratio < 53.413 : gb=5.59867
442 |     if Shareholdersequityratio>= 53.413  and Shareholdersequityratio <60.846  : gb=7.77582
443 |     if Shareholdersequityratio>= 60.846  and Shareholdersequityratio < 65.61 : gb=5.17068
444 |     if Shareholdersequityratio>= 65.61  and Shareholdersequityratio < 73.516 : gb=11.43853
445 |     if Shareholdersequityratio>= 73.516  and Shareholdersequityratio < 86.425 : gb=1.13651
446 |     if Shareholdersequityratio>= 86.425  and Shareholdersequityratio < big : gb=6.14852
447 |     return gb
448 | 
449 | #营业利润
450 | def operatingprofit1(operatingprofit):
451 |     if not operatingprofit is None: gb = 0.0001
452 |     if operatingprofit<1: gb=12.06823
453 |     if operatingprofit>= 1  and operatingprofit < 2177 : gb=0.0204081
454 |     if operatingprofit>= 2177  and operatingprofit < 3880 : gb=7.51031
455 |     if operatingprofit>= 3880  and operatingprofit < 8680 : gb=7.701602
456 |     if operatingprofit>= 8680  and operatingprofit < 15406 : gb=15.90353
457 |     if operatingprofit>= 15406  and operatingprofit < 48966  : gb=13.67607
458 |     if operatingprofit>= 48966   and operatingprofit <big  : gb=23.197575
459 |     return gb
460 | 
461 | #资本公积金
462 | def capitalsurplus1(capitalsurplus):
463 |     if not capitalsurplus is None: gb=0.0204081
464 |     if capitalsurplus < 15182 : gb=11.00745
465 |     if capitalsurplus>= 15182  and capitalsurplus < 24162 : gb=3.06527
466 |     if capitalsurplus>= 24162  and capitalsurplus < 42580 : gb=11.183809
467 |     if capitalsurplus>= 42580  and capitalsurplus < 63141  : gb=22.89289
468 |     if capitalsurplus>= 63141  and capitalsurplus < 83469 : gb=10.07029
469 |     if capitalsurplus>= 83469  and capitalsurplus < 130700 : gb=16.21322
470 |     if capitalsurplus>= 130700  and capitalsurplus < 192784 : gb=19.32758
471 |     if capitalsurplus>= 192784  and capitalsurplus < 437172 : gb=19.224904
472 |     if capitalsurplus>= 437172  and capitalsurplus < big: gb=18.28482
473 |     return gb
474 | 
475 | #销售毛利率
476 | def grossprofitratio1(grossprofitratio):
477 |     if not grossprofitratio is None: gb = 0.0001
478 |     if grossprofitratio < 14.93 : gb=13.35899
479 |     if grossprofitratio>= 14.93  and grossprofitratio < 24.901 : gb=7.53092
480 |     if grossprofitratio>= 24.901  and grossprofitratio < 29.444  : gb=0.657776
481 |     if grossprofitratio>= 29.444  and grossprofitratio < 34.152  : gb=9.190005
482 |     if grossprofitratio>= 34.152  and grossprofitratio < 38.541 : gb=0.020408
483 |     if grossprofitratio>= 38.541  and grossprofitratio < 43.684 : gb=15.0763
484 |     if grossprofitratio>= 43.684  and grossprofitratio < 61.368  : gb=7.81706
485 |     if grossprofitratio>= 61.368  and grossprofitratio < big  : gb=9.12169
486 |     return gb
487 | 
488 | #长期负债
489 | def longtermdebt1(longtermdebt):
490 |     if not longtermdebt is None: gb = 0.0001
491 |     if longtermdebt is None: gb=16.271027
492 |     if longtermdebt < 1467 : gb=6.670008
493 |     if longtermdebt>= 1467  and longtermdebt < 3329 : gb=0.020408
494 |     if longtermdebt>= 3329  and longtermdebt < 12102 : gb=0.32268
495 |     if longtermdebt>= 12102  and longtermdebt < 21541  : gb=3.54764
496 |     if longtermdebt>= 21541  and longtermdebt < 60391 : gb=3.88658
497 |     if longtermdebt>= 60391  and longtermdebt < 150453 : gb=4.770717
498 |     if longtermdebt>= 150453  and longtermdebt <big : gb=2.40914
499 |     return gb
500 | 
501 | #阶段成交量
502 | def volumeoftransaction1(volumeoftransaction):
503 |     if not volumeoftransaction is None: gb = 0.0001
504 |     if volumeoftransaction  < 72943 : gb=0.020408
505 |     if volumeoftransaction >= 72943  and volumeoftransaction < 99911.91 : gb=13.92472
506 |     if volumeoftransaction >= 99911.91   and volumeoftransaction  < 141196 : gb=3.67683
507 |     if volumeoftransaction >= 141196  and volumeoftransaction  < 213835 : gb=20.59017
508 |     if volumeoftransaction >= 213835  and volumeoftransaction  < 276842 : gb=18.98813
509 |     if volumeoftransaction >= 276842  and volumeoftransaction  < 346244 : gb=25.01875
510 |     if volumeoftransaction >= 346244  and volumeoftransaction  < 471774 : gb=19.66208
511 |     if volumeoftransaction >= 471774  and volumeoftransaction  < 989027 : gb=26.09589
512 |     if volumeoftransaction >= 989027  and volumeoftransaction  < 1484292  : gb=23.98660
513 |     if volumeoftransaction >= 1484292  and volumeoftransaction  < big: gb=35.2506
514 |     return gb
515 | 
516 | #阶段成交额
517 | def amountoftransaction1(amountoftransaction):
518 |     if not amountoftransaction is None: gb = 0.0001
519 |     if amountoftransaction < 7554 : gb=34.391644
520 |     if amountoftransaction>= 7554  and amountoftransaction < 10711.1 : gb=26.6226
521 |     if amountoftransaction>= 10711.1  and amountoftransaction < 17070.86 : gb=14.51471
522 |     if amountoftransaction>= 17070.86  and amountoftransaction < 24375.01 : gb=22.23326
523 |     if amountoftransaction>= 24375.01  and amountoftransaction < 44944.95: gb=15.27538
524 |     if amountoftransaction>= 44944.95 and amountoftransaction < 85508.35 : gb=20.7352
525 |     if amountoftransaction>= 85508.35  and amountoftransaction < 149157.36 : gb=9.90349
526 |     if amountoftransaction>= 149157.36  and amountoftransaction < big: gb=0.020408
527 |     return gb
528 | 
529 | #阶段涨幅
530 | def stagerange1(stagerange):
531 |     if not stagerange is None: gb = 0.0001
532 |     if stagerange<0 : gb=2.79856
533 |     if stagerange>= 0  and stagerange < 1: gb=0.020408
534 |     if stagerange >= 1 : gb = 0.029
535 |     return gb
536 | 
537 | 


--------------------------------------------------------------------------------
/graph/readme:
--------------------------------------------------------------------------------
1 | readme
2 | 


--------------------------------------------------------------------------------
/medical2last.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/medical2last.zip


--------------------------------------------------------------------------------
/news20200103.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/news20200103.csv


--------------------------------------------------------------------------------
/news20200103_2.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/news20200103_2.csv


--------------------------------------------------------------------------------
/news_industry.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import csv
 3 | import pandas as pd
 4 | import tushare as ts
 5 | 
 6 | #　创建“行业_股票”的关系
 7 | stockList = []
 8 | data = pd.read_csv("myJob1/kg/stock.csv")
 9 | for indexs in data.index:
10 |     id = data.loc[indexs].values[0]
11 |     code = str(data.loc[indexs].values[2]).zfill(6)
12 |     print(id, code)
13 |     stockList.append([id, code])
14 | industryList = []
15 | data = pd.read_csv("myJob1/kg/industry.csv")
16 | for indexs in data.index:
17 |     id = data.loc[indexs].values[0]
18 |     industry = data.loc[indexs].values[1]
19 |     print(id, industry)
20 |     industryList.append([id, industry])
21 | stock_industry_list = []
22 | data = pd.read_csv("myJob1/stock_industry_prep.csv")
23 | for indexs in data.index:
24 |     stock = str(data.loc[indexs].values[0]).zfill(6)
25 |     industry = data.loc[indexs].values[2]
26 |     stock_id =  ""
27 |     for j in range(len(stockList)):
28 |         if stock == stockList[j][1]:
29 |             stock_id = stockList[j][0]
30 |             break
31 |     industry_id = ""
32 |     for j in range(len(industryList)):
33 |         if industry == industryList[j][1]:
34 |             industry_id = industryList[j][0]
35 |             break
36 |     if stock_id!="" and industry_id!="":
37 |         stock_industry_list.append([stock_id, industry_id, "行业属于", "行业属于"])
38 | # 存储关系
39 | with open("myJob1/kg/news_industry.csv","w") as csvfile:
40 |     writer = csv.writer(csvfile)
41 |     writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"])
42 |     for i in range(len(stock_industry_list)):
43 |         writer.writerows([stock_industry_list[i]])
44 |         print(i, stock_industry_list[i])
45 | 


--------------------------------------------------------------------------------
/news_socre.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # coding: utf-8
 3 | import numpy as np
 4 | import pandas as pd
 5 | import jieba
 6 | from sklearn.feature_extraction.text import CountVectorizer
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.naive_bayes import MultinomialNB
 9 | 
10 | #csv_file = 'data1.csv'
11 | csv_file = 'news20200103_2.csv'
12 | f = open(csv_file, 'r', encoding="gbk", errors='ignore')   ##encoding=u'utf-8', encoding='gbk'
13 | #f = open(csv_file, 'r', encoding="ISO-8859-1", errors='ignore')
14 | data = pd.read_csv(f)
15 | data.head()
16 | #t=pd.DataFrame(data['comment'].astype(str))
17 | #print(t)
18 | #data = pd.read_csv('news20200103_2.csv',encoding="unicode_escape")
19 | 
20 | #print(data.head())
21 | def chinese_word_cut(mytext):
22 |     return " ".join(jieba.cut(mytext))
23 | data['cut_comment'] = data.comment.apply(chinese_word_cut)
24 | X = data['cut_comment']
25 | print(X)
26 | y = data.sentiment
27 | 
28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=22)
29 | 
30 | def get_custom_stopwords(stop_words_file):
31 |     with open(stop_words_file) as f:
32 |         stopwords = f.read()
33 |     stopwords_list = stopwords.split('\n')
34 |     custom_stopwords_list = [i for i in stopwords_list]
35 |     return custom_stopwords_list
36 | 
37 | stop_words_file = '哈工大停用词表.txt'
38 | 
39 | stopwords = get_custom_stopwords(stop_words_file)
40 | 
41 | vect = CountVectorizer(max_df = 0.8,
42 |                        min_df = 3,
43 |                        token_pattern=u'(?u)\\b[^\\d\\W]\\w+\\b',
44 |                        stop_words=frozenset(stopwords))
45 | #print(vect)
46 | test = pd.DataFrame(vect.fit_transform(X_train).toarray(), columns=vect.get_feature_names())
47 | test.head()
48 | 
49 | nb = MultinomialNB()
50 | 
51 | X_train_vect = vect.fit_transform(X_train)
52 | 
53 | nb.fit(X_train_vect, y_train)
54 | train_score = nb.score(X_train_vect, y_train)
55 | print('准确率',train_score)
56 | 
57 | # X_test_vect = vect.transform(X_test)
58 | # print(nb.score(X_test_vect, y_test))
59 | 
60 | X_vec = vect.transform(X)
61 | nb_result = nb.predict(X_vec)
62 | data['nb_result'] = nb_result
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/news_stock.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import csv
 3 | import pandas as pd
 4 | import tushare as ts
 5 | 
 6 | # 创建“概念_股票”的关系
 7 | stockList = []
 8 | data = pd.read_csv("myJob1/kg/stock.csv")
 9 | for indexs in data.index:
10 |     id = data.loc[indexs].values[0]
11 |     code = str(data.loc[indexs].values[2]).zfill(6)
12 |     print(id, code)
13 |     stockList.append([id, code])
14 | conceptList = []
15 | data = pd.read_csv("myJob1/kg/concept.csv")
16 | for indexs in data.index:
17 |     id = data.loc[indexs].values[0]
18 |     concept = data.loc[indexs].values[1]
19 |     print(id, concept)
20 |     conceptList.append([id, concept])
21 | stock_concept_list = []
22 | data = pd.read_csv("myJob1/stock_concept_prep.csv")
23 | for indexs in data.index:
24 |     stock = str(data.loc[indexs].values[0]).zfill(6)
25 |     concept = data.loc[indexs].values[2]
26 |     stock_id =  ""
27 |     for j in range(len(stockList)):
28 |         if stock == stockList[j][1]:
29 |             stock_id = stockList[j][0]
30 |             break
31 |     concept_id = ""
32 |     for j in range(len(conceptList)):
33 |         if concept == conceptList[j][1]:
34 |             concept_id = conceptList[j][0]
35 |             break
36 |     if stock_id!="" and concept_id!="":
37 |         stock_concept_list.append([stock_id, concept_id, "概念属于", "概念属于"])


--------------------------------------------------------------------------------
/test1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | def file_name(file_dir):
 6 |     for root, dirs, files in os.walk(file_dir):
 7 |         return files
 8 | 
 9 | csvPath = "myJob1/executive_prep.csv"
10 | 
11 | list_job = []
12 | list_sex = []
13 | list_age = []
14 | list_code = []
15 | list_name = []
16 | 
17 | # 解析html
18 | file_dir = "target"
19 | files = file_name(file_dir)
20 | for i in range(len(files)):
21 |     print(i, files[i])
22 |     if ".html" in files[i]:
23 |         htmlPath = "target/"+files[i]
24 |         htmlfile = open(htmlPath, 'r', encoding="gbk")
25 |         htmlpage = htmlfile.read()
26 | 
27 |         soup = BeautifulSoup(htmlpage, "html.parser")
28 |         code = soup.title.string.split(" ")[0].split("(")[1][:-1]
29 | 
30 |         body_tag = soup.body
31 | 
32 |         try:
33 |             # 获取序号
34 |             body_tag1 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("th", class_="tc")
35 |             for value in body_tag1:
36 |                 list_code.append(str(code))
37 | 
38 |             #　获取姓名
39 |             body_tag2 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("a", class_="turnto")
40 |             for value in body_tag2:
41 |                 list_name.append(value.string)
42 | 
43 |             # 获取职务
44 |             body_tag3 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("td", class_="jobs")
45 |             for value in body_tag3:
46 |                 list_job.append(value.string)
47 | 
48 |             # 获取性别、年龄
49 |             body_tag4 = body_tag.find("div", class_="m_tab_content", id="ml_001").find("tbody").find_all("td", class_="intro")
50 |             for value in body_tag4:
51 |                 if(len(value.string)>0):
52 |                     if(len(value.string.split("  "))>1):
53 |                         list_sex.append(value.string.split("  ")[0])
54 |                         list_age.append(value.string.split("  ")[1])
55 |                     else:
56 |                         list_sex.append("无")
57 |                         list_age.append("无")
58 |         except:
59 |             print(htmlPath+"————error")
60 | 
61 | print(len(list_name), len(list_sex), len(list_age), len(list_code), len(list_job))
62 | 
63 | # for i in range(len(list_code)):
64 | #     print(list_name[i], list_sex[i], list_age[i], list_code[i], list_job[i])
65 | 
66 | # 写入csv文件
67 | dataframe = pd.DataFrame({'高管姓名':list_name, '性别':list_sex, '年龄':list_age, '股票代码':list_code, '职位':list_job})
68 | columns = ['高管姓名','性别','年龄','股票代码', '职位']
69 | dataframe.to_csv(csvPath, index=False, columns=columns)


--------------------------------------------------------------------------------
/test2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tushare as ts
 3 | 
 4 | # 获取股票行业信息
 5 | df = ts.get_industry_classified()
 6 | csvPath = "myJob1/stock_industry_prep.csv"
 7 | df.to_csv(csvPath, index=False)
 8 | 
 9 | # 获取股票概念信息
10 | df = ts.get_concept_classified()
11 | csvPath = "myJob1/stock_concept_prep.csv"
12 | df.to_csv(csvPath, index=False)
13 | #print(df)
14 | stockList = []
15 | data = ts.get_stock_basics()
16 | for indexs in data.index:
17 |     name = data.loc[indexs].values[1]
18 |     code = str(data.loc[indexs].values[0]).zfill(6)
19 |     stockList.append([indexs, name, code])


--------------------------------------------------------------------------------
/test3.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import csv
  3 | import pandas as pd
  4 | import tushare as ts
  5 | 
  6 | # 读取“高管”实体
  7 | stockList = []
  8 | data = ts.get_stock_basics()
  9 | for indexs in data.index:
 10 |     stockList.append(indexs)
 11 | executiveList = []
 12 | data = pd.read_csv("myJob1/executive_prep.csv")
 13 | sign = 0
 14 | for indexs in data.index:
 15 |     name = data.loc[indexs].values[0]
 16 |     sex = data.loc[indexs].values[1]
 17 |     age = data.loc[indexs].values[2]
 18 |     code = str(data.loc[indexs].values[3]).zfill(6)
 19 |     job = data.loc[indexs].values[4]
 20 |     if code in stockList:
 21 |         sign = sign + 1
 22 |         executiveList.append([100000+sign, name, sex, age, code, job, "高管"])
 23 | with open("myJob1/kg/executive.csv","w") as csvfile:
 24 |     writer = csv.writer(csvfile)
 25 |     writer.writerow(["index:ID", "name", "sex", "age", "code", "job", ":LABEL"])
 26 |     for i in range(len(executiveList)):
 27 |         writer.writerows([executiveList[i]])
 28 |         print(i, executiveList[i], len(stockList))
 29 | 
 30 | # 创建“公司”实体
 31 | sign = 0
 32 | stockList2 = []
 33 | '''
 34 | stockList = []
 35 | data = pd.read_csv("myJob1/stock_concept_prep.csv")
 36 | for indexs in data.index:
 37 |     name = data.loc[indexs].values[1]
 38 |     code = str(data.loc[indexs].values[0]).zfill(6)
 39 |     if [name, code] not in stockList:
 40 |         stockList.append([name, code])
 41 |         stockList2.append([sign, name, code])
 42 |         sign = sign + 1
 43 | '''
 44 | data = ts.get_stock_basics()
 45 | for indexs in data.index:
 46 |     status = "normal"
 47 |     if "ST" in data.loc[indexs].values[0]:
 48 |         status = "ST"
 49 |     sign = sign + 1
 50 |     stockList2.append([200000+sign, data.loc[indexs].values[0], indexs, status, "企业"])
 51 | with open("myJob1/kg/stock.csv","w") as csvfile:
 52 |     writer = csv.writer(csvfile)
 53 |     writer.writerow(["index:ID", "name", "code", "status", ":LABEL"])
 54 |     for i in range(len(stockList2)):
 55 |         writer.writerows([stockList2[i]])
 56 |         print(i, stockList2[i])
 57 | 
 58 | #　创建“概念”实体
 59 | conceptList = []
 60 | data = pd.read_csv("myJob1/stock_concept_prep.csv")
 61 | for indexs in data.index:
 62 |     concept = data.loc[indexs].values[2]
 63 |     if concept not in conceptList:
 64 |         conceptList.append(concept)
 65 | with open("myJob1/kg/concept.csv","w") as csvfile:
 66 |     writer = csv.writer(csvfile)
 67 |     writer.writerow(["index:ID", "name", ":LABEL"])
 68 |     for i in range(len(conceptList)):
 69 |         writer.writerows([[300000+i+1, conceptList[i], "概念"]])
 70 |         print(i, conceptList[i])
 71 | 
 72 | # 创建“行业”实体
 73 | industryList = []
 74 | data = pd.read_csv("myJob1/stock_industry_prep.csv")
 75 | for indexs in data.index:
 76 |     industry = data.loc[indexs].values[2]
 77 |     if industry not in industryList:
 78 |         industryList.append(industry)
 79 | with open("myJob1/kg/industry.csv","w") as csvfile:
 80 |     writer = csv.writer(csvfile)
 81 |     writer.writerow(["index:ID", "name", ":LABEL"])
 82 |     for i in range(len(industryList)):
 83 |         writer.writerows([[400000+i+1, industryList[i], "行业"]])
 84 |         print(i, industryList[i])
 85 | 
 86 | # 创建”高管“和”公司“的关系
 87 | executiveList = []
 88 | codeList = []
 89 | data = pd.read_csv("myJob1/kg/executive.csv")
 90 | for indexs in data.index:
 91 |     index = data.loc[indexs].values[0]
 92 |     name = data.loc[indexs].values[1]
 93 |     code = str(data.loc[indexs].values[4]).zfill(6)
 94 |     job = data.loc[indexs].values[5]
 95 |     if code not in codeList:
 96 |         codeList.append(code)
 97 |     executiveList.append([index, name, code, job])
 98 | print("done...")
 99 | stockList = []
100 | data = pd.read_csv("myJob1/kg/stock.csv")
101 | for indexs in data.index:
102 |     index = data.loc[indexs].values[0]
103 |     name = data.loc[indexs].values[1]
104 |     code = str(data.loc[indexs].values[2]).zfill(6)
105 |     stockList.append([index, name, code])
106 | print("done...")
107 | # 双重循环
108 | execute_stock_List = []
109 | num = 0
110 | for i in range(len(executiveList)):
111 |     sign = ""
112 |     for j in range(len(stockList)):
113 |         if str(executiveList[i][2]) == str(stockList[j][2]):
114 |             str1 = re.sub('"','', executiveList[i][3])
115 |             execute_stock_List.append([executiveList[i][0], stockList[j][0], str1, "董事会成员"])
116 |             sign = "1"
117 |             break
118 |     if len(sign) == 0:
119 |         num = num + 1
120 |         print(num, str(executiveList[i][0])+" , "+str(executiveList[i][2]), len(stockList), len(executiveList[i][2]))
121 | print("done...")
122 | # 存储去重后股票
123 | with open("myJob1/kg/executive_stock.csv","w") as csvfile:
124 |     writer = csv.writer(csvfile)
125 |     writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"])
126 |     for i in range(len(execute_stock_List)):
127 |         writer.writerows([execute_stock_List[i]])
128 |         print(i, execute_stock_List[i])
129 | 
130 | #　创建“行业_股票”的关系
131 | stockList = []
132 | data = pd.read_csv("myJob1/kg/stock.csv")
133 | for indexs in data.index:
134 |     id = data.loc[indexs].values[0]
135 |     code = str(data.loc[indexs].values[2]).zfill(6)
136 |     print(id, code)
137 |     stockList.append([id, code])
138 | industryList = []
139 | data = pd.read_csv("myJob1/kg/industry.csv")
140 | for indexs in data.index:
141 |     id = data.loc[indexs].values[0]
142 |     industry = data.loc[indexs].values[1]
143 |     print(id, industry)
144 |     industryList.append([id, industry])
145 | stock_industry_list = []
146 | data = pd.read_csv("myJob1/stock_industry_prep.csv")
147 | for indexs in data.index:
148 |     stock = str(data.loc[indexs].values[0]).zfill(6)
149 |     industry = data.loc[indexs].values[2]
150 |     stock_id =  ""
151 |     for j in range(len(stockList)):
152 |         if stock == stockList[j][1]:
153 |             stock_id = stockList[j][0]
154 |             break
155 |     industry_id = ""
156 |     for j in range(len(industryList)):
157 |         if industry == industryList[j][1]:
158 |             industry_id = industryList[j][0]
159 |             break
160 |     if stock_id!="" and industry_id!="":
161 |         stock_industry_list.append([stock_id, industry_id, "行业属于", "行业属于"])
162 | # 存储关系
163 | with open("myJob1/kg/stock_industry.csv","w") as csvfile:
164 |     writer = csv.writer(csvfile)
165 |     writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"])
166 |     for i in range(len(stock_industry_list)):
167 |         writer.writerows([stock_industry_list[i]])
168 |         print(i, stock_industry_list[i])
169 | 
170 | # 创建“概念_股票”的关系
171 | stockList = []
172 | data = pd.read_csv("myJob1/kg/stock.csv")
173 | for indexs in data.index:
174 |     id = data.loc[indexs].values[0]
175 |     code = str(data.loc[indexs].values[2]).zfill(6)
176 |     print(id, code)
177 |     stockList.append([id, code])
178 | conceptList = []
179 | data = pd.read_csv("myJob1/kg/concept.csv")
180 | for indexs in data.index:
181 |     id = data.loc[indexs].values[0]
182 |     concept = data.loc[indexs].values[1]
183 |     print(id, concept)
184 |     conceptList.append([id, concept])
185 | stock_concept_list = []
186 | data = pd.read_csv("myJob1/stock_concept_prep.csv")
187 | for indexs in data.index:
188 |     stock = str(data.loc[indexs].values[0]).zfill(6)
189 |     concept = data.loc[indexs].values[2]
190 |     stock_id =  ""
191 |     for j in range(len(stockList)):
192 |         if stock == stockList[j][1]:
193 |             stock_id = stockList[j][0]
194 |             break
195 |     concept_id = ""
196 |     for j in range(len(conceptList)):
197 |         if concept == conceptList[j][1]:
198 |             concept_id = conceptList[j][0]
199 |             break
200 |     if stock_id!="" and concept_id!="":
201 |         stock_concept_list.append([stock_id, concept_id, "概念属于", "概念属于"])
202 | # 存储关系
203 | with open("myJob1/kg/stock_concept.csv","w") as csvfile:
204 |     writer = csv.writer(csvfile)
205 |     writer.writerow([":START_ID", ":END_ID", "relation", ":TYPE"])
206 |     for i in range(len(stock_concept_list)):
207 |         writer.writerows([stock_concept_list[i]])
208 |         print(i, stock_concept_list[i])


--------------------------------------------------------------------------------
/test4.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | zfc = '"董事长"  '
4 | 
5 | k=re.sub('"','',zfc)
6 | 
7 | print(k)


--------------------------------------------------------------------------------
/translatetxt.py:
--------------------------------------------------------------------------------
1 | f = open('jingyongnovel2.txt')
2 | o = open('medical13.txt', 'a')
3 | sourceInLines = str(f.readlines())
4 | str2=sourceInLines.replace("\\\'", "'")
5 | o.write(str2)
6 | 


--------------------------------------------------------------------------------
/translatetxt2.py:
--------------------------------------------------------------------------------
1 | #coding=utf-8 import jieba import jieba.posseg as pseg import time t1=time.time() f=open("t_with_splitter.txt","r") #读取文本 string=f.read().decode("utf-8") words = pseg.cut(string) #进行分词 result="" #记录最终结果的变量 for w in words:    result+= str(w.word)+"/"+str(w.flag) #加词性标注 f=open("t_with_POS_tag.txt","w") #将结果保存到另一个文档中 f.write(result) f.close() t2=time.time() print("分词及词性标注完成，耗时："+str(t2-t1)+"秒。") #反馈结果
2 | 


--------------------------------------------------------------------------------
/哈工大停用词表.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/哈工大停用词表.txt


--------------------------------------------------------------------------------
/投资决策流程及模型部署文件的生成.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/投资决策流程及模型部署文件的生成.pdf


--------------------------------------------------------------------------------
/股票择时投资策略2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crystal-tensor/Finance-Knowledge-Graph/6dff59528ffcc93549c2ae364f36eff30e031dc5/股票择时投资策略2.pdf


--------------------------------------------------------------------------------