├── .gitignore ├── README.md ├── bert ├── Dockerfile └── bert-start.sh └── elastic ├── create_document.py ├── create_index.py ├── elastic.py ├── example.csv ├── example.json1 ├── index_config.json └── index_documents.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kelvin-jose/elasticbert/24e47c173bbe02e6bb96d2422b72a78004df401a/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elasticbert 2 | Find more about the installations and setups here https://medium.com/@kelvinjose/elasticbert-information-retrieval-using-bert-and-elasticsearch-51fef465b9ae 3 | -------------------------------------------------------------------------------- /bert/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:1.12.0-py3 2 | 3 | RUN pip install --no-cache-dir bert-serving-server 4 | 5 | COPY . /bert 6 | 7 | WORKDIR /bert 8 | 9 | RUN chmod +x bert-start.sh 10 | 11 | ENTRYPOINT ["./bert-start.sh"] 12 | -------------------------------------------------------------------------------- /bert/bert-start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bert-serving-start -num_worker=1 -model_dir model/cased_L-12_H-768_A-12/ 3 | -------------------------------------------------------------------------------- /elastic/create_document.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This script creates documents in the required format for 4 | indexing. 5 | 6 | """ 7 | 8 | import json 9 | from pandas import read_csv 10 | from argparse import ArgumentParser 11 | from bert_serving.client import BertClient 12 | bc = BertClient(output_fmt='list', check_length=False) 13 | 14 | 15 | def create_document(doc, emb, index_name): 16 | return { 17 | '_op_type': 'index', 18 | '_index': index_name, 19 | 'title': doc['title'], 20 | 'abstract': doc['abstract'], 21 | 'abstract_vector': emb 22 | } 23 | 24 | 25 | def load_dataset(path): 26 | docs = [] 27 | df = read_csv(path) 28 | for row in df.iterrows(): 29 | series = row[1] 30 | doc = { 31 | 'title': series.title, 32 | 'abstract': series.abstract 33 | } 34 | docs.append(doc) 35 | return docs 36 | 37 | 38 | def bulk_predict(docs, batch_size=256): 39 | for i in range(0, len(docs), batch_size): 40 | batch_docs = docs[i: i+batch_size] 41 | embeddings = bc.encode([doc['abstract'] for doc in batch_docs]) 42 | for emb in embeddings: 43 | yield emb 44 | 45 | 46 | def main(args): 47 | docs = load_dataset(args.csv) 48 | with open(args.output, 'w') as f: 49 | for doc, emb in zip(docs, bulk_predict(docs)): 50 | d = create_document(doc, emb, args.index) 51 | f.write(json.dumps(d) + '\n') 52 | 53 | if __name__ == "__main__": 54 | 55 | parser = ArgumentParser() 56 | parser.add_argument('--index', required=True, help='name of the ES index') 57 | parser.add_argument('--csv', required=True, help='path to the input csv file') 58 | parser.add_argument('--output', required=True, help='name of the output file (example.json1)') 59 | args = parser.parse_args() 60 | main(args) -------------------------------------------------------------------------------- /elastic/create_index.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This file creates an index in the elasticsearch from a config file. 4 | 5 | """ 6 | 7 | from json import load 8 | from argparse import ArgumentParser 9 | from elasticsearch import Elasticsearch 10 | 11 | es = Elasticsearch('localhost:9200') 12 | 13 | def create_index(index, config): 14 | try: 15 | with open(config) as file: 16 | config = load(file) 17 | 18 | es.indices.create(index=index, body=config) 19 | print("[INFO] index " + index + " has been created!") 20 | except Exception as e: 21 | print("[WARNING] some exception has occurred!") 22 | 23 | 24 | if __name__ == "__main__": 25 | 26 | parser = ArgumentParser() 27 | parser.add_argument('--index', required=True, help='name of the ES index') 28 | parser.add_argument('--config', required=True, help='path to the ES mapping config') 29 | args = parser.parse_args() 30 | create_index(args.index, args.config) -------------------------------------------------------------------------------- /elastic/elastic.py: -------------------------------------------------------------------------------- 1 | from elasticsearch import Elasticsearch 2 | from bert_serving.client import BertClient 3 | from elasticsearch.exceptions import ConnectionError, NotFoundError 4 | 5 | # total number of responses 6 | SEARCH_SIZE = 1 7 | 8 | # establishing connections 9 | bc = BertClient(ip='localhost', output_fmt='list', check_length=False) 10 | client = Elasticsearch('localhost:9200') 11 | 12 | # this query is used as the search term, feel free to change 13 | query = 'machine learning' 14 | query_vector = bc.encode([query])[0] 15 | 16 | script_query = { 17 | "script_score": { 18 | "query": {"match_all": {}}, 19 | "script": { 20 | "source": "cosineSimilarity(params.query_vector, doc['abstract_vector']) + 1.0", 21 | "params": {"query_vector": query_vector} 22 | } 23 | } 24 | } 25 | 26 | try: 27 | response = client.search( 28 | index='researchgate', # name of the index 29 | body={ 30 | "size": SEARCH_SIZE, 31 | "query": script_query, 32 | "_source": {"includes": ["title", "abstract"]} 33 | } 34 | ) 35 | print(response) 36 | except ConnectionError: 37 | print("[WARNING] docker isn't up and running!") 38 | except NotFoundError: 39 | print("[WARNING] no such index!") 40 | -------------------------------------------------------------------------------- /elastic/example.csv: -------------------------------------------------------------------------------- 1 | "title","abstract" 2 | "Similarity Search - The Metric Space Approach","In the Information Society, information holds the master key to economic influence. Similarity Search: The Metric Space Approach will focus on efficient ways to locate user-relevant information in collections of objects, the similarity of which is quantified using a pairwise distance measure. This book is a direct response to recent advances in computing, communications and storage which have led to the current flood of digital libraries, data warehouses and the limitless heterogeneity of internet resources. Similarity Search: The Metric Space Approach will introduce state-of-the-art in developing index structures for searching complex data modeled as instances of a metric space. This book consists of two parts. Part 1 presents the metric search approach in a nutshell by defining the problem, describes major theoretical principals, and provides an extensive survey of specific techniques for a large range of applications. Part 2 concentrates on approaches particularly designed for searching in very large collections of data. Similarity Search: The Metric Space Approach is designed for a professional audience, composed of academic researchers as well as practitioners in industry. This book is also suitable as introductory material for graduate-level students in computer science." 3 | "Database Similarity Searches","With genome sequencing projects producing huge amounts of sequence data, database sequence similarity search has become a central tool in bioinformatics to identify potentially homologous sequences. It is thus widely used as an initial step for sequence characterization and annotation, phylogeny, genomics, transcriptomics, and proteomics studies. Database similarity search is based upon sequence alignment methods also used in pairwise sequence comparison. Sequence alignment can be global (whole sequence alignment) or local (partial sequence alignment) and there are algorithms to find the optimal alignment given particular comparison criteria. However, as database searches require the comparison of the query sequence with every single sequence in the database, heuristic algorithms have been designed to reduce the time required to build an alignment that has a reasonable chance to be the best one. Such algorithms have been implemented as fast and efficient programs (Blast, FastA) available in different types to address different kinds of problems. After searching the appropriate database, similarity search programs produce a list of similar sequences and local alignments. These results should be carefully examined before coming to any conclusion, as many traps await the similarity seeker: paralogues, multidomain proteins, pseudogenes, etc. This chapter presents points that should always be kept in mind when performing database similarity searches for various goals. It ends with a practical example of sequence characterization from a single protein database search using Blast." 4 | "Application of Improved SAX Algorithm to QAR Flight Data","During describing, storing and retrieving such operations on QAR flight data, traditional SAX can’t overcome time series amplitude flex and timeline drift, so improved algorithm is proposed. QAR flight data will be divided into three stages and use algorithms to fill the cruise stage, thus allowing effective search for time series of different length. The experiment and item prove the feasibility and effectiveness. It increased greatly the efficiency of aircraft troubleshooting." 5 | "Environmental bio-monitoring with high-throughput sequencing","There is much interest in using high-throughput DNA sequencing methodology to monitor microorganisms, complex plant and animal communities. However, there are experimental and analytical issues to consider before applying a sequencing technology, which was originally developed for genome projects, to ecological projects. Many of these issues have been highlighted by recent microbial studies. Understanding how high-throughput sequencing is best implemented is important for the interpretation of recent results and the success of future applications. Addressing complex biological questions with metagenomics requires the interaction of researchers who bring different skill sets to problem solving. Educators can help by nurturing a collaborative interdisciplinary approach to genome science, which is essential for effective problem solving. Educators are in a position to help students, teachers, the public and policy makers interpret the new knowledge that metagenomics brings. To do this, they need to understand, not only the excitement of the science but also the pitfalls and shortcomings of methodology and research designs. We review these issues and some of the research directions that are helping to move the field forward." 6 | "The influence of hashed fingerprints density on the machine learning methods performance","Computational techniques have become a vital part of today's drug discovery campaigns. Among a wide range of tools applied in this process, machine learning methods can be distinguished. They are used for instance in virtual screening (VS), where its role is to identify potentially active compounds out of large libraries of structures [1]. In order to enable the application of various learning algorithms in VS tasks, an appropriate representation of molecules is needed. One of the solutions comes from the hashed fingerprints, encoding the information about the structure in a form of a bit string [2]. Both length and density (the percentage of 1's) can be modified during hashed fingerprint generation, which (as it was already proved) influence the similarity searching process [3]. The aim of our study was to examine the impact of such fingerprint density on the performance of machine learning methods. A series of bit strings with different density values and of various lengths was generated by means of the RDKit software [4]. They were tested in classification tests of 5-HT 1A ligands, with the use of a set of algorithms (Naïve Bayes, SMO, Ibk, Decorate, Hyper-pipes, J48 and Random Forest), in order to determine an optimal values of the variables for machine learning experiments." 7 | -------------------------------------------------------------------------------- /elastic/example.json1: -------------------------------------------------------------------------------- 1 | {"_op_type": "index", "_index": "researchgate", "title": "Similarity Search - The Metric Space Approach", "abstract": "In the Information Society, information holds the master key to economic influence. Similarity Search: The Metric Space Approach will focus on efficient ways to locate user-relevant information in collections of objects, the similarity of which is quantified using a pairwise distance measure. This book is a direct response to recent advances in computing, communications and storage which have led to the current flood of digital libraries, data warehouses and the limitless heterogeneity of internet resources. Similarity Search: The Metric Space Approach will introduce state-of-the-art in developing index structures for searching complex data modeled as instances of a metric space. This book consists of two parts. Part 1 presents the metric search approach in a nutshell by defining the problem, describes major theoretical principals, and provides an extensive survey of specific techniques for a large range of applications. Part 2 concentrates on approaches particularly designed for searching in very large collections of data. Similarity Search: The Metric Space Approach is designed for a professional audience, composed of academic researchers as well as practitioners in industry. This book is also suitable as introductory material for graduate-level students in computer science.", "abstract_vector": [0.37637317180633545, -0.3312664330005646, -0.037558164447546005, 0.22292782366275787, 0.023478595539927483, -0.051287904381752014, -0.045021902769804, 0.5498619675636292, 0.2776307761669159, -0.23976480960845947, 0.0664098858833313, 0.07548315078020096, 0.37183424830436707, 0.3538104295730591, -0.1410691738128662, -0.12512221932411194, 0.21420174837112427, 0.2610425055027008, -0.24819938838481903, -0.05076759681105614, -0.05204439163208008, -0.29703009128570557, 0.00767618278041482, -0.22641120851039886, -0.254655659198761, -0.5391397476196289, 0.1899869590997696, 0.2073746621608734, -0.09152521193027496, 0.27246755361557007, 0.09463002532720566, 0.28475329279899597, -0.3520195782184601, 0.3907057046890259, -0.09424881637096405, -0.20415978133678436, -0.09855219721794128, -0.11976000666618347, -0.11242856830358505, 0.63429194688797, 0.571245551109314, -0.17297464609146118, 0.07494455575942993, -0.11779135465621948, 0.5049768686294556, -0.09770792722702026, 0.11289709806442261, 0.3533058166503906, -0.3817463219165802, 0.26528510451316833, 0.3332339823246002, -0.05768059194087982, -0.3355742394924164, 0.0748131200671196, -0.4341215193271637, 0.08879973739385605, -0.18184398114681244, 0.01873045228421688, -0.4506363570690155, 0.2789561450481415, -0.37764906883239746, -0.05343056097626686, 0.050283562391996384, 0.3734470009803772, 0.23543329536914825, 0.03390657529234886, -0.1803608536720276, -0.02904977835714817, 0.04594121500849724, -0.49390122294425964, 0.1522238552570343, -0.25774329900741577, -0.01078724768012762, 0.18988995254039764, -0.08354020118713379, -0.08357750624418259, 0.22399631142616272, -0.22519052028656006, -0.3286091089248657, -0.5160956382751465, 0.08146081119775772, -0.06934261322021484, 0.15808218717575073, 0.20580285787582397, 0.23545515537261963, 0.3434144854545593, -0.04061711207032204, 0.24240750074386597, 0.3686714470386505, 0.7698301076889038, 0.08626434206962585, -0.06781626492738724, 0.28611287474632263, -0.18641293048858643, 0.18157094717025757, 0.26350754499435425, -0.14574481546878815, 0.18521608412265778, -0.44025349617004395, 0.34823712706565857, 0.30711907148361206, -0.3117745816707611, -0.31577572226524353, -0.18590477108955383, 0.1877427101135254, 0.29912108182907104, -0.08861427009105682, -0.6326338052749634, -0.46814560890197754, -0.196376234292984, 0.5213150978088379, -0.5480417609214783, 0.3062138855457306, -0.04047388955950737, 0.12648004293441772, 0.12242832779884338, 0.09880907833576202, -0.13962773978710175, -0.2470845729112625, -0.13238783180713654, -0.15550127625465393, -0.0034930717665702105, -0.1659402698278427, -0.1938484013080597, 0.04823613539338112, -0.1992797702550888, -0.4290452301502228, 0.03781775385141373, 0.17359276115894318, -0.028090117499232292, -0.27383744716644287, -0.08358585089445114, -0.16952519118785858, 0.008009491488337517, -0.00821311678737402, -0.21921616792678833, -0.560157060623169, 0.1120852380990982, -0.7008047699928284, 0.11583032459020615, -0.03383281081914902, -0.4210756719112396, 0.073889821767807, -0.5730822086334229, 0.24524451792240143, 0.08055877685546875, -0.07926614582538605, 0.03747764229774475, -0.6013455390930176, 0.2677421271800995, 0.05402638018131256, 0.05228208377957344, 0.062047649174928665, 0.3074394762516022, 0.3815208375453949, -0.1759854555130005, -0.02999282069504261, -0.3942420184612274, 0.620818018913269, -0.29656895995140076, 0.6861703991889954, -0.1364414095878601, 0.17758092284202576, 0.26478812098503113, 0.3022279739379883, 0.6450985670089722, 0.2121465653181076, 0.08087652921676636, 0.4155506491661072, -0.34020984172821045, 0.5727642178535461, 0.15727874636650085, -0.07695295661687851, 0.08673905581235886, 0.3857179880142212, 0.2087993621826172, 0.4610162675380707, -0.26944223046302795, 0.18861202895641327, -0.21826639771461487, 0.081814706325531, 0.23618610203266144, -0.04164295271039009, -0.702119767665863, 0.23733268678188324, 0.18943065404891968, -0.11502408981323242, -0.23592780530452728, 0.12969385087490082, 0.23692898452281952, 0.042463406920433044, 0.06565672159194946, 0.04645996913313866, 0.21139205992221832, -0.31844958662986755, 0.16735342144966125, -0.09812168031930923, -0.017899999395012856, 0.019268427044153214, -0.45949965715408325, -0.049205247312784195, -0.0458424910902977, 0.0026317972224205732, 0.5640268921852112, -0.23064976930618286, 0.058814167976379395, -0.6624391078948975, 0.251218318939209, 0.06977412849664688, -0.28719937801361084, -0.19370244443416595, -0.04009601101279259, -0.2787199318408966, -0.00393272889778018, -0.07797808945178986, 0.14460943639278412, -0.431630402803421, -0.08096493780612946, 0.24041473865509033, -0.12949208915233612, 0.26085832715034485, 0.10784631967544556, 0.18528488278388977, -0.2486056387424469, -0.15897047519683838, 0.06620000302791595, -0.03535784035921097, -0.08392345160245895, -0.0278727188706398, 0.11020291596651077, 0.343100905418396, 0.3119712769985199, -0.11055795848369598, 0.17988651990890503, -0.052375707775354385, -0.7074747681617737, -0.4767606854438782, -0.06010545417666435, 0.39298683404922485, -0.1418260633945465, -0.3535231649875641, 0.6277775168418884, -0.2334464192390442, 0.2967841923236847, -0.3155389726161957, 0.4586944580078125, 0.015864841639995575, 0.46217361092567444, -0.2445000857114792, -0.22620528936386108, 0.05476083606481552, -0.2592207193374634, -0.10416184365749359, -0.05461634695529938, -0.022692549973726273, -0.11745771765708923, -0.041685234755277634, -0.1052500531077385, 0.030121468007564545, 0.14889423549175262, 0.5434539914131165, 0.08953963220119476, -0.21421052515506744, -0.08618401736021042, -0.20196828246116638, -0.009307806380093098, -0.3563087582588196, -0.2143065631389618, 0.05612756311893463, -0.10622910410165787, 0.08755765110254288, -0.3548559248447418, 0.39843982458114624, 0.45052987337112427, 0.19782881438732147, -0.005942717660218477, 0.2914770841598511, -0.4394330680370331, 0.5024721622467041, 0.1226896271109581, 0.3482930362224579, 0.0008518669637851417, 0.06362207233905792, 0.1783483326435089, -0.6655495166778564, 0.052064526826143265, -0.15858297049999237, 0.05718228220939636, -0.09910430759191513, 0.04491952061653137, -0.20919539034366608, 0.5554954409599304, -0.12186705321073532, 0.10632539540529251, -0.09018377214670181, -0.23549146950244904, -0.9389384984970093, 0.00623319111764431, -0.33191677927970886, 0.4341689646244049, -0.14212389290332794, 0.1511937975883484, -0.2575627565383911, 0.14791998267173767, 0.08910457789897919, 0.05040477216243744, -0.05404872074723244, 0.20031996071338654, 0.03670444339513779, 0.01638953760266304, 0.7238962650299072, 0.35620513558387756, -0.4927418828010559, 0.36002254486083984, -0.2842026948928833, -0.5753090381622314, 0.17814873158931732, -0.18829752504825592, 0.01001303642988205, -0.05878477543592453, -0.13333483040332794, -0.08961296081542969, -0.3453878164291382, -0.19300201535224915, -0.31372708082199097, 0.3562842309474945, 0.2407991737127304, -0.33348995447158813, -0.013694029301404953, -0.27649563550949097, -0.09845637530088425, -0.325137734413147, 0.07183458656072617, -0.2766396999359131, -0.05778205767273903, 0.27810347080230713, 0.35214072465896606, 0.4466087222099304, -0.5825076103210449, 0.1323498785495758, 0.00439082458615303, 0.28876635432243347, -0.3523581027984619, 0.0003316587826702744, 0.1796637773513794, 0.6609200239181519, 0.23370815813541412, -0.24870462715625763, 0.14677274227142334, -0.080341637134552, -0.7065988183021545, -0.6370348930358887, -0.49377918243408203, 0.3676913380622864, 1.212424874305725, 0.28117573261260986, -0.21784378588199615, -0.10887489467859268, 0.5160586833953857, 0.13905677199363708, 0.09955962002277374, -0.2621653974056244, -0.12474356591701508, 0.1295207142829895, 0.055658385157585144, -0.15764838457107544, 0.0178642887622118, 0.43990761041641235, 0.10252076387405396, -0.006368883419781923, 0.3269089460372925, 0.14051033556461334, 0.05834344029426575, -0.09068885445594788, -0.1073889508843422, -0.2784096300601959, -0.06586918979883194, 0.04075629636645317, 0.4846629202365875, -0.013287979178130627, 0.27237510681152344, 0.40230655670166016, 0.4504360258579254, -0.09186676144599915, 0.025807857513427734, -0.30385342240333557, 0.08765016496181488, -0.4505749046802521, 0.47648516297340393, 0.24880284070968628, -0.05835522338747978, -0.2007790505886078, 0.18914340436458588, 0.42445412278175354, 0.3070511519908905, 0.11367098987102509, -0.09339742362499237, -0.03866887092590332, -0.13206380605697632, -0.12212016433477402, -0.34287238121032715, 0.4046083390712738, -0.16029033064842224, -0.41995352506637573, -0.036483846604824066, -0.6122480034828186, -0.27311640977859497, 0.1806480437517166, -0.38412997126579285, 0.4879688322544098, -0.21019572019577026, -0.11126437783241272, 0.16572381556034088, -0.12773464620113373, 0.020883655175566673, -0.4979965090751648, 0.18341800570487976, -0.030094552785158157, 0.2114751785993576, 0.3118109703063965, 0.03168864548206329, 0.35231804847717285, 0.5869550108909607, 0.0024942916352301836, -0.45332664251327515, 0.08483868837356567, 0.11461170017719269, -0.08894187957048416, -0.5281318426132202, 0.2695286273956299, -0.48429375886917114, 0.12195158004760742, 0.5131124258041382, 0.4534478783607483, -0.0667039081454277, -0.5285449624061584, 0.6974906921386719, 0.008777857758104801, -0.03859514370560646, -0.3070058524608612, 0.34316644072532654, -0.15219739079475403, -0.32142704725265503, 0.0954350009560585, -0.22434651851654053, 0.14385780692100525, -0.21850138902664185, -0.11725130677223206, -0.059350285679101944, -0.24424023926258087, -0.04602689668536186, -0.3225405514240265, 0.4375191628932953, -0.3199043571949005, -0.18890120089054108, -0.4710024297237396, -0.04060674086213112, 0.024486931040883064, 0.3205762505531311, -0.24843166768550873, -0.5017600059509277, 0.07288262248039246, -0.041747234761714935, 0.18734951317310333, -0.11040210723876953, 0.271292507648468, 0.11172766983509064, 0.19365495443344116, -0.5833194851875305, 0.12485315650701523, 0.45634689927101135, 0.14785325527191162, 0.15634524822235107, -0.008856680244207382, 0.4417518973350525, 0.08077450096607208, 0.2435491681098938, -0.0768263190984726, 0.057480815798044205, -0.00968922022730112, -0.37191158533096313, 0.2248469889163971, -0.4911949932575226, -0.591479480266571, 0.3513548672199249, 0.3757001459598541, -0.16081276535987854, 0.29091116786003113, 0.08325716108083725, 0.4656810760498047, -0.4958566427230835, 0.4533582627773285, 0.27564340829849243, 0.7593759298324585, 0.07624215632677078, 0.4195525646209717, -0.2508835792541504, -0.05889710783958435, 0.08678177744150162, -0.8607399463653564, 0.3115180730819702, -0.042606864124536514, -0.3144640326499939, 0.028012670576572418, 0.07243292033672333, 0.7682197093963623, -0.039517585188150406, -0.25213298201560974, 0.3467513620853424, 0.4486011862754822, -0.07269575446844101, 0.17331579327583313, -0.034893643110990524, 0.09847401827573776, -0.06897605210542679, -0.30652177333831787, -0.32291531562805176, -0.07145979255437851, -0.584063708782196, 0.030389325693249702, 0.038704704493284225, 0.18834373354911804, -0.6322062015533447, 0.21160808205604553, 0.4160415530204773, -0.006191964261233807, 0.09032242000102997, -0.03947685658931732, -0.452836811542511, -0.40188324451446533, 0.010284855030477047, 0.026512861251831055, 0.2312709093093872, -0.09735062718391418, 0.30709660053253174, 0.03855298459529877, 0.008555624634027481, 0.23410114645957947, 0.0026847838889807463, 0.3518255949020386, -0.452597439289093, 0.13440372049808502, 0.011643178761005402, 0.40787193179130554, 0.04566749185323715, 0.568187415599823, 0.20815861225128174, 0.36264026165008545, -0.11538737267255783, 0.05224110558629036, 0.14097385108470917, 0.4399794638156891, -0.26285117864608765, 0.016280755400657654, -0.14294356107711792, 0.3125947117805481, -0.7263769507408142, -23.562105178833008, 0.07232223451137543, 0.12058894336223602, -0.24259476363658905, -0.24153022468090057, -0.2465103268623352, 0.07447182387113571, -0.4736485183238983, 0.019782382994890213, -0.30159926414489746, 0.2056305855512619, 0.20705543458461761, -0.5485680103302002, -0.016984371468424797, -0.2290676087141037, 0.03788358345627785, -0.4012826681137085, -0.2175520956516266, -0.2490825653076172, -0.05547937750816345, 0.18503136932849884, 0.011996548622846603, -0.011003195308148861, 0.061115723103284836, 0.15419155359268188, 0.020927423611283302, 0.018214980140328407, -0.2253739982843399, 0.11551165580749512, 0.14088301360607147, 0.17540273070335388, -0.43976539373397827, 0.19704438745975494, 0.2831715941429138, -0.13270294666290283, 0.02360602095723152, 0.18290765583515167, -0.17383059859275818, 0.31943279504776, 0.21505539119243622, 0.22126275300979614, 0.06090717390179634, 0.2773906886577606, 0.215247243642807, 0.06176243722438812, 0.19518397748470306, 0.39365479350090027, -0.08391376584768295, -0.5486515164375305, 0.21785442531108856, -0.13965857028961182, 0.1823107749223709, -0.4035900831222534, -0.21488280594348907, -0.14461641013622284, 0.20419052243232727, -0.11934028565883636, 0.5670527815818787, 0.467253178358078, -0.3171699643135071, -0.5442911982536316, 0.18844249844551086, -0.09466484934091568, -0.1334865242242813, -0.4140351414680481, 0.5041291117668152, -0.32191163301467896, 0.31260502338409424, -0.2993837893009186, 0.24947603046894073, -0.3419414162635803, 0.17721326649188995, 0.2796716094017029, -0.4912639260292053, 0.35796818137168884, -0.004382845014333725, -0.05421530082821846, 0.30419695377349854, 0.5484389662742615, 0.47720950841903687, -0.5905007719993591, 0.0682842880487442, 0.02803119830787182, 0.018343310803174973, 0.12901723384857178, 0.20203140377998352, -0.022248711436986923, 0.2385154515504837, -0.30127468705177307, 0.07961389422416687, 0.15617340803146362, -0.11384782195091248, -0.2841457724571228, 0.40425869822502136, -0.19032540917396545, 0.04966805875301361, -0.7219070196151733, -0.021756134927272797, -0.3077150583267212, -0.025631548836827278, -0.009708631783723831, 0.5553847551345825, 0.3045938014984131, -0.05677582323551178, 0.0011177670676261187, -0.3601633310317993, 0.14254455268383026, -0.6158615350723267, -0.013998099602758884, -0.1687718629837036, -0.18255765736103058, 0.02296719141304493, -0.23310595750808716, 0.09295377135276794, 0.205294668674469, 0.3027175962924957, 0.42656826972961426, 0.11052682995796204, -0.09267906844615936, -0.32875382900238037, -0.6008241176605225, -0.05126196891069412, 0.37488096952438354, 0.5632932782173157, -0.3892771005630493, 0.19639846682548523, -0.41320332884788513, 0.2453674077987671, -0.291326642036438, 0.1031276062130928, 0.3749399483203888, -0.16911406815052032, -0.30229806900024414, 0.07198256254196167, -0.0522468239068985, 0.17552703619003296, -0.2563469409942627, 0.07928317785263062, -0.0884820744395256, 0.06754527240991592, -1.0060642957687378, 0.1795281171798706, 0.5024130940437317, 0.17666001617908478, -0.17627578973770142, -0.3200725317001343, 0.44574809074401855, 0.0002456159854773432, 0.1740991175174713, 0.28717318177223206, -0.4492674171924591, 0.28361067175865173, 0.34079205989837646, 0.028405047953128815, 0.09629912674427032, -0.6398774981498718, -0.0800582692027092, 0.08601474016904831, -0.25504374504089355, -0.39174434542655945, 0.20999091863632202, -0.16717344522476196, -0.07549077272415161, 0.023406360298395157, 0.16171784698963165, 0.01263616606593132, -0.18483112752437592, -0.21254490315914154, 0.3325229287147522, 0.032773420214653015, 0.42506998777389526, 0.17967943847179413, -0.11466490477323532, -0.5975770354270935, 0.30726510286331177, 0.06726855039596558, -0.10518888384103775, 0.12202140688896179, 0.05665508657693863, 0.1577533483505249, 0.09999841451644897, 0.3666403293609619, -0.47229886054992676, -0.6036521792411804, -0.0789918527007103, 0.08080124855041504, -0.3888062536716461, 0.2141568809747696, -0.17826803028583527, 0.2614773213863373, 0.5134115815162659, 0.4959062337875366, 0.19277292490005493, 0.03063686564564705, 0.015804477035999298, 0.2656279504299164, -0.07110751420259476, 0.06423328816890717, -0.24588046967983246, -0.037049856036901474, -0.0007797208381816745, 0.0888325497508049, -0.09523191303014755, -0.23489998281002045, -0.6709890961647034, -0.3208419680595398, -0.09623111039400101, -0.09146011620759964, 0.23692503571510315, -0.13624311983585358, 0.297378808259964]} 2 | {"_op_type": "index", "_index": "researchgate", "title": "Database Similarity Searches", "abstract": "With genome sequencing projects producing huge amounts of sequence data, database sequence similarity search has become a central tool in bioinformatics to identify potentially homologous sequences. It is thus widely used as an initial step for sequence characterization and annotation, phylogeny, genomics, transcriptomics, and proteomics studies. Database similarity search is based upon sequence alignment methods also used in pairwise sequence comparison. Sequence alignment can be global (whole sequence alignment) or local (partial sequence alignment) and there are algorithms to find the optimal alignment given particular comparison criteria. However, as database searches require the comparison of the query sequence with every single sequence in the database, heuristic algorithms have been designed to reduce the time required to build an alignment that has a reasonable chance to be the best one. Such algorithms have been implemented as fast and efficient programs (Blast, FastA) available in different types to address different kinds of problems. After searching the appropriate database, similarity search programs produce a list of similar sequences and local alignments. These results should be carefully examined before coming to any conclusion, as many traps await the similarity seeker: paralogues, multidomain proteins, pseudogenes, etc. This chapter presents points that should always be kept in mind when performing database similarity searches for various goals. It ends with a practical example of sequence characterization from a single protein database search using Blast.", "abstract_vector": [0.6226556897163391, 0.09961648285388947, -0.12058653682470322, 0.1826859712600708, -0.24145302176475525, -0.3421052098274231, 0.3430440127849579, 0.34688639640808105, 0.04302772507071495, 0.1151508167386055, -0.20650604367256165, 0.05808325111865997, -0.3515530824661255, 0.23618662357330322, -0.38621261715888977, -0.48019301891326904, -0.004761682357639074, -0.05274973064661026, -0.04413403570652008, -0.3457265794277191, 0.02595214545726776, -0.8479899764060974, 0.3203590512275696, -0.4724694490432739, 0.016422849148511887, -0.7381826639175415, 0.23498785495758057, 0.05234389379620552, 0.07182420045137405, 0.24970164895057678, -0.12769284844398499, 0.19202035665512085, -0.2603401243686676, 0.31151387095451355, -0.1416594684123993, 0.2942935526371002, -0.27914249897003174, 0.004859625361859798, -0.566402018070221, 0.06844025105237961, -0.12722383439540863, 0.39932724833488464, -0.2914681136608124, 0.4430195987224579, 0.14990317821502686, -0.1419685333967209, 0.10702373832464218, 0.3243202865123749, -0.17197109758853912, -0.21981047093868256, 0.2347850203514099, 0.17075727880001068, -0.37131214141845703, 0.056387241929769516, 0.3745990991592407, -0.2794847786426544, -0.2491092085838318, 0.04518142342567444, -0.18952307105064392, -0.0054038153029978275, -0.03953799605369568, 0.18001633882522583, 0.0145537368953228, -0.12118571996688843, 0.04155693203210831, -0.08937131613492966, -0.6673262715339661, 0.05165587365627289, 0.11960878223180771, -0.460628479719162, -0.26676467061042786, -0.32555991411209106, -0.016055351123213768, 0.14673550426959991, 0.08801324665546417, 0.26348015666007996, 0.013359651900827885, -0.31546393036842346, -0.4683157205581665, -1.1695828437805176, 0.3702298104763031, 0.04847753420472145, 0.34832268953323364, 0.20812204480171204, -0.010198641568422318, 0.02137666940689087, -0.091184101998806, 0.3103959560394287, 0.6057144403457642, 0.7741560339927673, 0.19948838651180267, 0.09966277331113815, 0.3603527843952179, -0.07988068461418152, 0.006318616680800915, -0.29329201579093933, -0.305205374956131, 0.012198195792734623, -0.3791583776473999, 0.3465432822704315, 0.8177602291107178, -0.04625996947288513, -0.20253092050552368, 0.23915451765060425, 0.5175754427909851, 0.23137521743774414, 0.3398250639438629, -0.3843754231929779, -0.5300629734992981, -0.19174641370773315, 0.6430792212486267, -0.6088686585426331, 0.17254741489887238, 0.1742861419916153, 0.3169879615306854, -0.2519455552101135, -0.46616610884666443, 0.046482376754283905, -0.29777583479881287, -0.1734389364719391, 0.47026726603507996, 0.2915225923061371, 0.21620191633701324, -0.44726988673210144, 0.11989255249500275, 0.10355689376592636, -0.1751454770565033, 0.10539813339710236, 0.16467911005020142, 0.10242190212011337, -0.5877955555915833, -0.34941768646240234, -0.022603044286370277, -0.16958087682724, 0.018666712567210197, -0.18843534588813782, -0.46291857957839966, 0.2980251610279083, -0.7236957550048828, -0.44287288188934326, 0.06692802160978317, 0.21435463428497314, -0.007653595879673958, -0.6528238654136658, 0.47653472423553467, -0.35089829564094543, -0.42671847343444824, 0.21535024046897888, -0.6291172504425049, 0.5667794346809387, -0.28395527601242065, 0.20607858896255493, -0.4905197024345398, 0.25206825137138367, -0.07780838757753372, -0.25684237480163574, -0.33349087834358215, -0.2035951167345047, 0.48441603779792786, -0.35947489738464355, 0.6607933640480042, 0.004013530444353819, 0.17076778411865234, 0.2871459722518921, 0.24582746624946594, 0.5559915900230408, 0.14978952705860138, 0.004033240955322981, 0.023750819265842438, -0.006055976264178753, 0.5291673541069031, -0.0011055293725803494, -0.19080796837806702, 0.2739335596561432, 0.613387405872345, 0.23135453462600708, 0.08300264179706573, -0.36494526267051697, -0.1936461329460144, -0.1527498960494995, 0.18915210664272308, -0.36122557520866394, -0.2021951675415039, -0.5452274680137634, 0.12104292958974838, 0.1877843290567398, 0.219132199883461, -0.4922577142715454, 0.26131436228752136, 0.1307794749736786, 0.1697513908147812, 0.17411573231220245, -0.11151430010795593, 0.1811780482530594, -0.34583622217178345, -0.12383053451776505, 0.2024925798177719, 0.5673309564590454, -0.03045187145471573, 0.2513156831264496, 0.019969437271356583, 0.09820301830768585, 0.16209550201892853, 0.8300991654396057, 0.10240921378135681, 0.030234267935156822, -0.5756186246871948, 0.09042583405971527, 0.30166366696357727, -0.21445049345493317, 0.1343318521976471, -0.6230555176734924, -0.46378016471862793, -0.052259668707847595, -0.16226083040237427, -0.03869592398405075, -0.2689667046070099, -0.15126387774944305, -0.04197950288653374, 0.06774387508630753, 0.1252439171075821, 0.08000126481056213, 0.5064423084259033, -0.8762838840484619, -0.3734872043132782, 0.22847698628902435, 0.051374588161706924, -0.0775066390633583, 0.645602285861969, -0.0350097194314003, 0.24482452869415283, 0.7006339430809021, 0.015525604598224163, 0.17281723022460938, 0.1910427212715149, -0.5221055746078491, -0.3408660590648651, -0.33507710695266724, -0.38728928565979004, 0.23635391891002655, 0.07098705321550369, 0.38925111293792725, -0.2251330018043518, 0.05092087760567665, 0.022884393110871315, 0.3795405328273773, 0.4312158226966858, 0.6474952697753906, -0.11984702199697495, -0.4275538921356201, -0.19791260361671448, -0.3548204302787781, 0.5048063397407532, 0.00536501407623291, 0.0036323440726846457, 0.16214951872825623, -0.04783143475651741, -0.10047870874404907, 0.40577369928359985, 0.1789465695619583, 0.2597328722476959, 0.08573779463768005, -0.03601706400513649, -0.08811287581920624, -0.18948392570018768, -0.29692018032073975, -0.6563115119934082, -0.6100348830223083, 0.04352344572544098, -0.06146426498889923, 0.05646659433841705, 0.0008374167955480516, 0.3475836515426636, 0.12093479931354523, 0.8027416467666626, 0.46501749753952026, 0.31942063570022583, -0.3878004550933838, 0.22348174452781677, -0.5441825985908508, 0.3857606053352356, -0.00029619544511660933, -0.32605671882629395, 0.19935105741024017, -0.5946128964424133, 0.08953322470188141, 0.07167873531579971, 0.30741190910339355, 0.08741141110658646, -0.1865239292383194, -0.36220574378967285, 0.3729386627674103, 0.21545635163784027, 0.04079441726207733, 0.11145418137311935, -0.09902448952198029, -0.5991837978363037, -0.243251234292984, -0.30519944429397583, 0.24044370651245117, 0.16937094926834106, -0.059643082320690155, -0.31886163353919983, -0.021161889657378197, 0.19331412017345428, -0.4097829759120941, 0.3299425542354584, -0.021351736038923264, 0.016166746616363525, -0.07916602492332458, 0.3757713735103607, 0.33575737476348877, -0.22475861012935638, 0.49513065814971924, -0.29813989996910095, -0.6154378652572632, 0.07811833918094635, 0.1902419924736023, -0.22445237636566162, 0.16637685894966125, -0.15741394460201263, 0.14554645121097565, -0.6628209948539734, 0.4343264698982239, 0.25492602586746216, 0.19542162120342255, 0.31183505058288574, 0.21657925844192505, -0.14959058165550232, -0.5754730701446533, -0.2556036114692688, -0.048946212977170944, 0.4891170859336853, -0.06775172799825668, -0.024196051061153412, -0.10524594038724899, 0.06794175505638123, 0.33036312460899353, 0.03739697113633156, -0.2868344187736511, 0.2519930303096771, 0.47391849756240845, -0.15314935147762299, 0.03284415975213051, 0.03794805333018303, 0.7292891144752502, 0.06844139844179153, -0.04914752021431923, -0.28636881709098816, -0.15032535791397095, -0.33169662952423096, -0.9907318949699402, -0.18691594898700714, 0.1665826290845871, 1.2082520723342896, 0.2450878918170929, -0.1353682279586792, -0.15890012681484222, 0.5877320766448975, 0.278730571269989, -0.16434460878372192, -0.15676884353160858, -0.15092287957668304, -0.17574283480644226, 0.34440910816192627, -0.6310241222381592, -0.23579299449920654, 0.8909671902656555, 0.2822136878967285, -0.3191840648651123, 0.4103487730026245, 0.12797139585018158, 0.19140911102294922, -0.05741773173213005, 0.006261829286813736, -0.42388516664505005, -0.3175587058067322, -0.049432553350925446, 0.6209027767181396, -0.28938961029052734, 0.3399259150028229, 0.3747566342353821, 0.16322654485702515, -0.08442943543195724, -0.03054584003984928, -0.06455302983522415, -0.041352443397045135, -0.5661457180976868, -0.21187534928321838, -0.07850062102079391, 0.18708528578281403, 0.03364193066954613, 0.30299922823905945, 0.05891430377960205, -0.15645302832126617, -0.23497706651687622, -0.49436429142951965, 0.3018699884414673, 0.2009035348892212, -0.027165643870830536, -0.11693208664655685, 0.43205273151397705, 0.22390247881412506, -0.5594696402549744, -0.48172733187675476, -0.6229496002197266, -1.0226062536239624, 0.0605507418513298, -0.5344454646110535, 0.5642105340957642, -0.32186710834503174, 0.02762257494032383, -0.09276353567838669, 0.14895877242088318, 0.9966467022895813, 0.02485792711377144, -0.21514347195625305, -0.2967562973499298, 0.15782783925533295, 0.14597997069358826, -0.2650006413459778, 0.05435197800397873, 0.20109504461288452, -0.12097354233264923, -0.915574848651886, 0.12689995765686035, -0.34235355257987976, -0.07569005340337753, -0.318150132894516, -0.1297709345817566, -0.29798322916030884, 0.27882006764411926, 0.06137741729617119, 0.5556517243385315, -0.33536431193351746, -0.17056192457675934, 0.443170964717865, 0.20388780534267426, 0.23434096574783325, -0.11172358691692352, 0.21463024616241455, 0.15267609059810638, 0.10410679876804352, -0.2191920131444931, -0.039481475949287415, 0.2273084968328476, -0.12027551978826523, -0.21292950212955475, -0.03810150921344757, -0.7228077054023743, -0.0021589866373687983, -0.4473905563354492, 0.20439966022968292, -0.27486369013786316, 0.0017631971277296543, -0.7218115925788879, 0.039234355092048645, 0.16710889339447021, -0.40866824984550476, 0.12018075585365295, -0.553641140460968, -0.27149099111557007, 0.25121089816093445, 0.35252830386161804, -0.09853595495223999, 0.1963946372270584, 0.09752294421195984, -0.05077627673745155, -1.1566503047943115, 0.1305346041917801, 0.5373185873031616, 0.2605697512626648, -0.061466705054044724, 0.11752230674028397, 0.39238518476486206, 0.14067862927913666, -0.0003686982672661543, 0.278530478477478, -0.23994463682174683, 0.08051224797964096, -0.2788076400756836, 0.6111660003662109, -0.4638975262641907, -0.32898271083831787, 0.6155139803886414, 0.47597068548202515, 0.34506189823150635, 0.17505525052547455, 0.049069419503211975, 0.3810518980026245, -0.3768891394138336, 0.35948875546455383, 0.2593490183353424, 0.7180517315864563, -0.40549713373184204, -0.2141779065132141, -0.3775516450405121, 0.2736101448535919, -0.07232803851366043, -0.4218396842479706, 0.11982837319374084, -0.026126885786652565, -0.4260369539260864, 0.2577444016933441, -0.03484318405389786, 0.5282642841339111, 0.013660273514688015, -0.20855407416820526, 0.089813731610775, 0.6698712110519409, 0.06082066893577576, 0.18454352021217346, 0.1231154128909111, -0.07512649893760681, 0.2304786741733551, -0.12449608743190765, 0.1062350943684578, -0.28894633054733276, -0.3661545515060425, 0.5333379507064819, -0.07069393247365952, 0.2792796790599823, -0.11637214571237564, 0.04904777184128761, 0.39507752656936646, -0.022823398932814598, 0.9726905226707458, 0.03074829652905464, -0.4621685743331909, -0.48394161462783813, 0.09083256125450134, -0.10454654693603516, 0.47869792580604553, -0.08341295272111893, 0.4592619836330414, 0.19512906670570374, 0.08097121119499207, 0.15983375906944275, -0.03962540999054909, 0.36897218227386475, -0.2825780510902405, -0.31030693650245667, 0.08715378493070602, 0.351490318775177, -0.20508474111557007, 0.6757425665855408, 0.14830993115901947, 0.3646460473537445, 0.11050129681825638, 0.372984915971756, 0.16389337182044983, 0.08825217187404633, 0.1323365718126297, 0.19744114577770233, 0.11053811013698578, 0.621442973613739, -0.2364269495010376, -23.829755783081055, 0.08666212111711502, -0.15650317072868347, -0.14289885759353638, -0.2706034779548645, -0.09606576710939407, 0.09843501448631287, -0.31947070360183716, 0.01347278617322445, -0.44276225566864014, 0.39308127760887146, 0.46003684401512146, 0.1273687481880188, -0.3010328412055969, -0.5122658014297485, 0.5197736620903015, -0.4489917755126953, -0.18697193264961243, -0.24316094815731049, 0.1417142152786255, 0.01288884598761797, 0.07849321514368057, -0.21144035458564758, 0.33044910430908203, 0.07741712033748627, 0.29412198066711426, 0.42093145847320557, -0.04885059967637062, -0.3698389530181885, 0.06269912421703339, 0.2756023705005646, 0.13514591753482819, 0.21618148684501648, -0.03262291103601456, -0.1642731875181198, -0.17272283136844635, 0.26706382632255554, -0.5029526948928833, 0.5015693306922913, 0.07236713171005249, -0.16223885118961334, -0.10122359544038773, 0.2509395480155945, -0.2430381178855896, 0.1652960330247879, 0.340739905834198, 0.20525255799293518, -0.27654320001602173, -0.32929861545562744, 0.028722025454044342, -0.09528755396604538, 0.12873849272727966, -0.5826712846755981, 0.17527131736278534, -0.389669269323349, 0.1007787212729454, -0.3387778103351593, 0.9837486147880554, 0.49441367387771606, -0.4206142723560333, -0.45065784454345703, 0.12508369982242584, -0.10592459887266159, -0.1373448520898819, -0.2996816039085388, 0.5064613223075867, -0.1431742161512375, -0.046559542417526245, -0.2884224057197571, 0.29500940442085266, -0.26733726263046265, 0.10747890174388885, 0.3197422921657562, -0.6619466543197632, -0.009831543080508709, -0.7428438663482666, 0.09246880561113358, 0.14654140174388885, 0.47629714012145996, 0.1723531186580658, -0.554461658000946, -0.045926693826913834, 0.27345994114875793, -0.6793563961982727, 0.13927116990089417, 0.09996172040700912, 0.03893178328871727, 0.380619078874588, -0.04205505922436714, 0.4304962158203125, 0.285930335521698, 0.25231918692588806, 0.13775259256362915, 0.18752525746822357, -0.2799653708934784, -0.02383795939385891, -0.30867239832878113, -0.3453749418258667, -0.1735231727361679, -0.28353169560432434, -0.6282945275306702, 0.8063986897468567, 0.6370394825935364, -0.1668599247932434, 0.15203270316123962, -0.06151775270700455, 0.48668891191482544, -0.4744527041912079, 0.023401055485010147, -0.06938554346561432, 0.06482819467782974, -0.2534409463405609, -0.5030328631401062, -0.06526821851730347, 0.4088224768638611, 0.07689706981182098, 0.26525866985321045, -0.2645213007926941, -0.19626033306121826, -0.5948385000228882, -0.786782443523407, -0.35060223937034607, 0.14245228469371796, 0.37126532196998596, -0.2865426242351532, -0.0505538284778595, -0.40022096037864685, 0.3204311728477478, -0.12215597182512283, 0.14877434074878693, 0.18860627710819244, -0.7316367626190186, -0.27561184763908386, 0.0922660082578659, -0.14527204632759094, -0.07816242426633835, -0.2796480655670166, 0.011181595735251904, 0.0297376848757267, -0.463072806596756, -0.7530686259269714, 0.30214428901672363, 0.1760062277317047, 0.08842778950929642, 0.046154748648405075, -0.3030611276626587, 0.21903002262115479, 0.5375044345855713, 0.8656588196754456, 0.240189328789711, -0.23640859127044678, 0.5128482580184937, 0.524614155292511, 0.052721891552209854, 0.43021315336227417, -0.5525885224342346, 0.13272389769554138, -0.15615853667259216, -0.22359997034072876, -0.2559525966644287, 0.08485009521245956, -0.321031391620636, -0.2611462473869324, 0.13133364915847778, 0.21457728743553162, 0.16186614334583282, -0.3032201826572418, -0.41782769560813904, 0.1777183711528778, -0.0812639594078064, 0.21362486481666565, 0.43130168318748474, 0.060730546712875366, -0.6843588948249817, -0.2361403852701187, 0.07391799241304398, 0.7541681528091431, 0.13842283189296722, 0.35309234261512756, 0.2914980947971344, 0.15916381776332855, 0.40831270813941956, -0.6029332280158997, -0.06464341282844543, -0.23687726259231567, -0.04266757518053055, -0.17207284271717072, 0.3261251449584961, -0.14575734734535217, 0.07455683499574661, 1.1328281164169312, -0.009125388227403164, 0.07301627099514008, 0.11064283549785614, 0.09397797286510468, 0.17475473880767822, 0.4179529845714569, 0.0023795864544808865, -0.16576503217220306, -0.04402098059654236, -0.6096538305282593, -0.11943960189819336, 0.4268716871738434, -0.26862236857414246, -0.48984968662261963, 0.08989930897951126, -0.08143720775842667, -0.10820133984088898, -0.07442264258861542, 0.25803473591804504, 0.3286072015762329]} 3 | {"_op_type": "index", "_index": "researchgate", "title": "Application of Improved SAX Algorithm to QAR Flight Data", "abstract": "During describing, storing and retrieving such operations on QAR flight data, traditional SAX can\u2019t overcome time series amplitude flex and timeline drift, so improved algorithm is proposed. QAR flight data will be divided into three stages and use algorithms to fill the cruise stage, thus allowing effective search for time series of different length. The experiment and item prove the feasibility and effectiveness. It increased greatly the efficiency of aircraft troubleshooting.", "abstract_vector": [0.19036197662353516, 0.06593388319015503, 0.0689990222454071, -0.10977751016616821, -0.45738306641578674, -0.6507702469825745, 0.18313027918338776, 0.12364189326763153, -0.049605078995227814, -0.07144094258546829, -0.24542345106601715, 0.4623880386352539, 0.44182682037353516, 0.35771578550338745, -0.1288900524377823, -0.1529480218887329, 0.31412437558174133, -0.17450939118862152, -0.48374977707862854, 0.06557685881853104, 0.013967079110443592, -0.46875137090682983, 0.27349135279655457, -0.5597888827323914, 0.3296395242214203, -0.6497237682342529, -0.03701792284846306, 0.01854434423148632, 0.4713990092277527, 0.040252942591905594, -0.05874936282634735, 0.09538598358631134, 0.2600201964378357, 0.0892898440361023, -0.018282147124409676, 0.11267417669296265, -0.17127104103565216, 0.07577015459537506, -0.26984748244285583, -0.09836966544389725, 0.6313126683235168, 0.025185368955135345, -0.40349799394607544, 0.2390177845954895, 0.2983209490776062, -0.06774720549583435, 0.33048561215400696, 0.6995428204536438, 0.3441479802131653, 0.2032797932624817, 0.4924653172492981, 0.1911630779504776, -0.25499582290649414, 0.2502484619617462, -0.20511704683303833, -0.2495761513710022, 0.211065411567688, 0.2729021906852722, 0.17759275436401367, 0.19953562319278717, -0.23672831058502197, 0.16335512697696686, 0.3095364570617676, -0.16298997402191162, 0.021564476191997528, -0.10934144258499146, -0.48333680629730225, 0.3926968276500702, 0.1893947422504425, -0.507612943649292, -0.20847007632255554, -0.3396516442298889, -0.27172794938087463, 0.002180791925638914, -0.31311991810798645, 0.478005975484848, 0.10703802853822708, -0.379342645406723, -0.3337811529636383, -0.5029182434082031, 0.20971646904945374, -0.07525279372930527, 0.20908893644809723, 0.365360289812088, 0.06344153732061386, 0.002737605245783925, 0.033292923122644424, 0.18580633401870728, 0.21947254240512848, 0.23504917323589325, 0.46321383118629456, -0.06145993620157242, 0.5663089156150818, -0.06105712056159973, -0.3049006760120392, -0.07843119651079178, 0.02396249957382679, -0.37098976969718933, -0.3381241261959076, 0.043921198695898056, 0.2626088261604309, 0.028912009671330452, -0.389459490776062, 0.061776064336299896, 0.23246370255947113, 0.0635218620300293, 0.050043985247612, -0.48806095123291016, -0.3096703588962555, 0.022072775289416313, 0.2940432131290436, -0.29183536767959595, 0.3968076705932617, -0.10177548229694366, 0.5041964054107666, -0.26909196376800537, 0.29118087887763977, 0.21161337196826935, -0.20317712426185608, -0.054455146193504333, 0.11583155393600464, -0.16498765349388123, 0.1751900166273117, -0.12904897332191467, 0.4063189625740051, -0.5223280787467957, 0.334891140460968, -0.023833349347114563, 0.14481373131275177, 0.38319969177246094, -0.31768348813056946, 0.22393669188022614, -0.09541978687047958, 0.0692056342959404, -0.031180210411548615, 0.021867146715521812, -0.44521260261535645, -0.07879724353551865, -0.5871811509132385, -0.26680392026901245, 0.09728915989398956, 0.23745286464691162, -0.037535760551691055, -0.11800482124090195, 0.13766585290431976, -0.37639090418815613, -0.25805649161338806, -0.2552509009838104, -0.11944491416215897, 0.27527257800102234, -0.257106214761734, -0.4255926012992859, -0.3402705788612366, -0.004056941252201796, -0.08812056481838226, -0.15505151450634003, -0.4534599781036377, -0.4733665883541107, 0.502144992351532, 0.08131460845470428, 0.7419416308403015, 0.0019563334062695503, -0.041745785623788834, -0.47451382875442505, 0.05249243602156639, 0.25860336422920227, 0.027167610824108124, -0.2459169179201126, 0.08215945959091187, 0.6068867444992065, 0.8379258513450623, 0.19643884897232056, -0.4457179009914398, 0.3609974682331085, 0.19742856919765472, 0.002097737742587924, 0.5122343301773071, -0.49566319584846497, -0.226986363530159, 0.3736196458339691, -0.268269419670105, -0.18676267564296722, 0.2732686698436737, -0.09375938773155212, 0.08411672711372375, -0.33964312076568604, -0.31216830015182495, -0.4425325393676758, 0.22789539396762848, 0.2802218198776245, 0.26164016127586365, 0.1590173989534378, -0.13704510033130646, 0.041075922548770905, -0.2763191759586334, 0.20403480529785156, 0.04335873946547508, 0.07998619973659515, 0.1398392617702484, -0.26597994565963745, -0.07464698702096939, 0.11222944408655167, 0.09199085831642151, 0.46192851662635803, -0.0973876565694809, -0.06067261844873428, -0.3048851490020752, 0.009208339266479015, 0.1739608645439148, -0.38250359892845154, 0.23016008734703064, -0.865868330001831, -0.2909621298313141, -0.11382748931646347, -0.04095742106437683, 0.0960162803530693, -0.44712555408477783, -0.006302847992628813, -0.1085515022277832, -0.2520672082901001, -0.10695812106132507, 0.0805547833442688, 0.40026381611824036, -0.2500016689300537, -0.19345037639141083, -0.14116336405277252, -0.25642117857933044, -0.35438141226768494, 0.27922165393829346, -0.5349897742271423, 0.1438627690076828, 0.6028150320053101, 0.05156876891851425, 0.3148089647293091, -0.10641355812549591, -0.5321597456932068, 0.009402516297996044, -0.46693122386932373, -0.2907487750053406, 0.17708171904087067, 0.010473464615643024, -0.058479636907577515, 0.2476390302181244, 0.34420689940452576, 0.0016361820744350553, 0.6426964402198792, 0.029247436672449112, 0.4629329442977905, -0.08163304626941681, -0.4241258203983307, 0.35910454392433167, 0.40303662419319153, -0.39035895466804504, -0.1669696867465973, 0.0999264270067215, 0.6122949719429016, 0.20436319708824158, -0.20172350108623505, 0.41824984550476074, -0.7453100681304932, 0.6506096124649048, 0.010162007994949818, 0.0037606190890073776, -0.2355133444070816, 0.035307273268699646, -0.6231754422187805, -0.12471798807382584, 0.06004215404391289, -0.4633868932723999, 0.0003847786283586174, 0.2053712010383606, 0.03310017287731171, 0.07144369184970856, 0.17236022651195526, 0.6661660671234131, 0.44039708375930786, 0.068496473133564, 0.15769505500793457, -0.2420600950717926, 0.03875412791967392, 0.5119216442108154, 0.05186180770397186, -0.17808309197425842, 0.22675594687461853, -0.7918970584869385, 0.19583579897880554, -0.016001876443624496, -0.3193027079105377, 0.08128152042627335, -0.472696989774704, -0.40799781680107117, 0.5496423244476318, 0.24843420088291168, 0.0419158898293972, 0.3233056664466858, 0.18258178234100342, -0.09102786332368851, 0.1781141459941864, -0.22022132575511932, 0.30615538358688354, -0.3216869831085205, 0.13408872485160828, 0.2999601662158966, -0.1009155660867691, 0.3030585050582886, -0.426666259765625, -0.16982698440551758, 0.2374296933412552, 0.14352886378765106, 0.22237274050712585, 0.5312142372131348, 0.0303448848426342, 0.006367310881614685, -0.1464533656835556, -0.41875022649765015, -0.024572201073169708, 0.04247278347611427, 0.10577031970024109, 0.1621803492307663, 0.05731327831745148, 0.2571808695793152, 0.06654832512140274, -0.4058499038219452, 0.12663641571998596, 0.09407911449670792, 0.27960309386253357, -0.30029478669166565, -0.05698326230049133, 0.07841131091117859, -0.3376815915107727, 0.11866797506809235, 0.2731647193431854, 0.012146892957389355, 0.3254600763320923, 0.014374755322933197, 0.26169466972351074, -0.035459037870168686, 0.16584785282611847, -0.5267816781997681, 0.23868313431739807, -0.05620281398296356, 0.29604947566986084, 0.006968922447413206, 0.07339412719011307, 0.6641033887863159, 0.13259786367416382, 0.2920568585395813, 0.3029095232486725, -0.28376504778862, 0.09531088918447495, -0.1470973789691925, -0.38992226123809814, -0.03493296355009079, 0.10825658589601517, 1.2518748044967651, -0.022845132276415825, 0.06306619942188263, 0.03844660148024559, 0.38760608434677124, 0.07629381865262985, -0.06494414806365967, 0.1429966241121292, 0.19155631959438324, -0.060745395720005035, 0.254570335149765, -0.09529628604650497, 0.2121339589357376, 0.23097211122512817, 0.2291516363620758, -0.16649170219898224, 0.45568862557411194, -0.46246108412742615, 0.2268306314945221, -0.2783644497394562, 0.19937045872211456, -0.284003883600235, -0.05845199152827263, 0.05330301448702812, 0.33424100279808044, -0.28366318345069885, -0.17939262092113495, 0.2726818323135376, 0.46320638060569763, -0.0008471764740534127, -0.39101991057395935, 0.14036287367343903, -0.16235429048538208, -0.34850406646728516, 0.17278793454170227, 0.15506123006343842, 0.08418349921703339, 0.4856909215450287, 0.19245654344558716, -0.12004914879798889, -0.10373511910438538, -0.0845990851521492, -0.25308406352996826, 0.5661712884902954, -0.2294139713048935, -0.22052820026874542, -0.2815992534160614, 0.45297959446907043, -0.0026927583385258913, -0.3166579306125641, -0.5440244078636169, -0.23238837718963623, -0.38262829184532166, -0.07105564326047897, 0.010799393989145756, 0.3362382650375366, -0.3269253075122833, -0.3406795263290405, 0.35900384187698364, -0.0865231528878212, 0.7506575584411621, -0.3932836949825287, -0.2191423922777176, 0.002046625129878521, -0.047659050673246384, -0.07593998312950134, 0.08352672308683395, 0.14281591773033142, 0.07597321271896362, -0.40172669291496277, -0.3612361550331116, 0.16732017695903778, -0.048913534730672836, -0.109775610268116, -0.6272135972976685, 0.08335976302623749, -0.39911338686943054, -0.27871331572532654, -0.1539115309715271, 0.1312905251979828, -0.07001280784606934, -0.29677727818489075, 0.5888376832008362, -0.12176138162612915, 0.28153195977211, 0.13009142875671387, -0.17148374021053314, 0.23256880044937134, 0.03305079787969589, -0.4477089047431946, 0.17541304230690002, -0.17951740324497223, -0.02254576049745083, 0.21724426746368408, -0.14667877554893494, -0.8892167806625366, -0.05381035804748535, 0.020062115043401718, 0.2269667387008667, 0.005052896682173014, 0.37535911798477173, -0.8644449710845947, -0.1979755461215973, 0.26247909665107727, 0.1266418695449829, -0.24771326780319214, -0.573792040348053, 0.1599118560552597, 0.345540314912796, -0.06756246834993362, -0.05121375992894173, 0.20241104066371918, 0.05637899041175842, -0.15628580749034882, -0.5671113133430481, -0.10960956662893295, 0.017373189330101013, 0.2016032338142395, 0.010356349870562553, 0.15144135057926178, 0.6824958324432373, 0.22644180059432983, 0.3038989305496216, 0.4022577702999115, -0.1031394749879837, -0.013291836716234684, -0.14716224372386932, 0.6744145750999451, -0.3813456594944, -0.10057903081178665, 0.4925798773765564, 0.1295691430568695, 0.0989488884806633, 0.08293388038873672, -0.21102026104927063, 0.6371045112609863, -0.19586023688316345, 0.1921815723180771, -0.23110000789165497, 0.6925508379936218, -0.025400901213288307, 0.22827352583408356, -0.03461410105228424, 0.29564154148101807, 0.08289498090744019, -0.1060243621468544, 0.04438425600528717, -0.040792547166347504, -0.12889893352985382, -0.09098376333713531, -0.16546835005283356, 0.5694025754928589, 0.09596510976552963, -0.21783113479614258, -0.02741324156522751, 0.2898938059806824, 0.2717626690864563, -0.07507020980119705, -0.1949966847896576, -0.014661076478660107, -0.1008104681968689, -0.35777994990348816, -0.1719760149717331, 0.31116238236427307, -0.0026305443607270718, 0.22594183683395386, -0.0918605700135231, -0.05193289741873741, -0.3715408742427826, 0.06078095734119415, 0.36300182342529297, -0.15867237746715546, 0.26701003313064575, -0.07478868216276169, -0.2250167429447174, -0.2537497282028198, 0.26650235056877136, 0.2424507588148117, -0.2967791259288788, -0.267713725566864, 0.4668525755405426, -0.0660637840628624, 0.1907901167869568, -0.05632550269365311, -0.12417551130056381, 0.33599159121513367, -0.575025200843811, -0.04363245889544487, 0.12009666115045547, 0.14878860116004944, -0.08747101575136185, 1.2161763906478882, 0.2923894226551056, 0.37670329213142395, -0.3709038496017456, -0.1153305321931839, -0.4019676148891449, 0.30224502086639404, -0.14865250885486603, 0.09964560717344284, 0.17743034660816193, 0.5168175101280212, -0.21788762509822845, -24.32552719116211, -0.47370803356170654, -0.16578403115272522, -0.0648450180888176, -0.38238078355789185, 0.008995532058179379, -0.06955989450216293, -0.2959984540939331, 0.3476291298866272, -0.39651039242744446, 0.28471341729164124, -0.04881025850772858, -0.37677666544914246, -0.1967455893754959, -0.3070775270462036, -0.2738986313343048, 0.2090654820203781, -0.4997175633907318, -0.38820403814315796, 0.14716589450836182, 0.2412102073431015, -0.10967109352350235, -0.3497202396392822, 0.15191145241260529, 0.3878905475139618, 0.5608004331588745, 0.07506845891475677, -0.3897548019886017, 0.07243207842111588, 0.4812587797641754, 0.13743354380130768, -0.17806878685951233, -0.045572880655527115, -0.010706495493650436, 0.2010473757982254, -0.1631166785955429, -0.07996924221515656, -0.41381484270095825, 0.27794766426086426, 0.09348364919424057, 0.3274933695793152, -0.25066065788269043, 0.19929872453212738, 0.24201896786689758, -0.2093275636434555, 0.2880539000034332, -0.05467331409454346, 0.035863246768713, 0.07884137332439423, -0.11839862167835236, -0.20375846326351166, 0.001530562643893063, -0.7628015279769897, -0.00533301243558526, -0.10475592315196991, 0.03475997596979141, -0.17143778502941132, 1.0569422245025635, -0.021282382309436798, -0.5324254035949707, -0.17200081050395966, -0.1362752467393875, 0.3379134237766266, -0.056719060987234116, -0.16495293378829956, 0.23684659600257874, -0.23104339838027954, -0.1438669115304947, -0.6107290387153625, 0.16931688785552979, -0.35624855756759644, -0.050450846552848816, 0.36240142583847046, -0.25965219736099243, 0.5335567593574524, -0.5439482927322388, 0.11407886445522308, 0.23239557445049286, 0.08332915604114532, 0.043361373245716095, -0.8754779696464539, -0.1464422643184662, 0.09529590606689453, 0.043852172791957855, -0.07973744720220566, 0.1218366026878357, -0.44212067127227783, 0.11871801316738129, -0.33975750207901, 0.5076271295547485, 0.1653306782245636, 0.06064791604876518, -0.09571035206317902, -0.24003976583480835, 0.188860222697258, -0.10125993937253952, -0.5695949792861938, -0.12424648553133011, -0.339628666639328, -0.11424235999584198, 0.18776027858257294, 0.8175471425056458, 0.060031499713659286, 0.16542044281959534, -0.11002890765666962, -0.2767347991466522, 0.4708312153816223, -0.007039129734039307, -0.137653648853302, 0.15626804530620575, -0.11126726120710373, -0.1892085075378418, -0.48882541060447693, 0.1233358159661293, 0.06550878286361694, 0.3874558210372925, -0.1617002636194229, 0.06464795768260956, -0.054110560566186905, -0.3062582015991211, -0.5033833980560303, 0.0027379614766687155, 0.7970425486564636, -0.049308180809020996, 0.0939251258969307, -0.3376869559288025, -0.17851385474205017, 0.5826887488365173, -0.32299840450286865, 0.059901703149080276, 0.07768502086400986, -0.16800445318222046, -0.20954950153827667, -0.2640314996242523, 0.023547997698187828, -0.14492419362068176, -0.18660610914230347, 0.3352097272872925, -0.5700657367706299, 0.3310835659503937, -0.975251317024231, 0.19864477217197418, 0.27352240681648254, -0.11958903074264526, 0.249376580119133, -0.0868348479270935, -0.20820622146129608, 0.2510952949523926, 0.12866947054862976, 0.09799680858850479, 0.27367255091667175, -0.32333600521087646, 0.18943066895008087, 0.026667285710573196, 0.006741042248904705, -0.6955859661102295, 0.09629028290510178, 0.4896852374076843, -0.46374112367630005, -0.24826833605766296, 0.6639472246170044, -0.366624653339386, 0.007570089306682348, -0.02290138229727745, 0.20914819836616516, -0.30228009819984436, 0.13762900233268738, -0.16601072251796722, 0.03833678737282753, 0.2899363040924072, -0.21885372698307037, 0.004731265362352133, -0.03250860422849655, -0.19038084149360657, -0.10379545390605927, 0.19189853966236115, 0.33873942494392395, -0.14636270701885223, 0.48791396617889404, 0.22353030741214752, 0.19136181473731995, 0.10280200839042664, -0.25256240367889404, -0.18200801312923431, -0.10533134639263153, 0.07249490171670914, 0.08862494677305222, 0.2886475622653961, 0.08050546050071716, 0.08813364058732986, 0.2602481544017792, -0.49429985880851746, -0.07702478021383286, -0.0007834195857867599, 0.1575484722852707, 0.7321777939796448, 0.022468574345111847, -0.15590019524097443, -0.11782147735357285, -0.17502489686012268, 0.17517060041427612, -0.266200989484787, -0.24323096871376038, -0.40312111377716064, -0.24259702861309052, -0.3249403238296509, 0.06582222133874893, -0.34352171421051025, 0.3391503393650055, -0.3256579637527466, 0.28509998321533203]} 4 | {"_op_type": "index", "_index": "researchgate", "title": "Environmental bio-monitoring with high-throughput sequencing", "abstract": "There is much interest in using high-throughput DNA sequencing methodology to monitor microorganisms, complex plant and animal communities. However, there are experimental and analytical issues to consider before applying a sequencing technology, which was originally developed for genome projects, to ecological projects. Many of these issues have been highlighted by recent microbial studies. Understanding how high-throughput sequencing is best implemented is important for the interpretation of recent results and the success of future applications. Addressing complex biological questions with metagenomics requires the interaction of researchers who bring different skill sets to problem solving. Educators can help by nurturing a collaborative interdisciplinary approach to genome science, which is essential for effective problem solving. Educators are in a position to help students, teachers, the public and policy makers interpret the new knowledge that metagenomics brings. To do this, they need to understand, not only the excitement of the science but also the pitfalls and shortcomings of methodology and research designs. We review these issues and some of the research directions that are helping to move the field forward.", "abstract_vector": [0.7663964033126831, 0.04481969028711319, -0.1650785207748413, 0.011732553131878376, -0.36276036500930786, -0.5271050930023193, 0.1487666517496109, 0.1865106225013733, -0.08250944316387177, 0.04703173041343689, 0.14552296698093414, 0.30418315529823303, -0.3223412036895752, 0.22474011778831482, -0.40083980560302734, -0.6254733800888062, -0.14070260524749756, -0.019053971394896507, 0.0939696729183197, -0.41203057765960693, -0.1707942932844162, -0.39735302329063416, -0.004908150061964989, -0.4667351245880127, -0.0390586219727993, -1.012539267539978, -0.2290361374616623, 0.06863784044981003, 0.2168232500553131, 0.2823036313056946, -0.352142333984375, 0.2130599021911621, 0.04166179522871971, 0.23761332035064697, -0.05069520324468613, 0.6061593294143677, -0.15418577194213867, 0.09870639443397522, -0.4424082934856415, 0.016727492213249207, -0.10027044266462326, 0.4799600839614868, -0.5875153541564941, 0.27314814925193787, 0.22541195154190063, -0.42443394660949707, 0.1532210111618042, 0.204213485121727, -0.22929592430591583, -0.3231618404388428, 0.037814557552337646, 0.07184119522571564, -0.38628289103507996, -0.03923952206969261, 0.21516670286655426, -0.26677533984184265, -0.09333761036396027, 0.12399940192699432, -0.038939908146858215, 0.0441008098423481, 0.2416456937789917, -0.053382158279418945, 0.10809163749217987, -0.10891855508089066, 0.07313564419746399, -0.16960479319095612, -0.3092711269855499, 0.1808169037103653, -0.14595118165016174, -0.563800573348999, -0.31605273485183716, -0.4879634976387024, 0.10154543071985245, -0.18217475712299347, -0.09950518608093262, 0.447601318359375, -0.20011089742183685, -0.49159857630729675, -0.326884925365448, -0.7759617567062378, 0.186399444937706, 0.08142649382352829, -0.0652185007929802, 0.26693302392959595, -0.050089031457901, 0.06059694290161133, -0.04519348964095116, 0.3046635091304779, 0.2643873691558838, 0.4622887074947357, 0.1326596587896347, 0.17708851397037506, 0.20042015612125397, 0.09556876868009567, 0.00680557731539011, -0.3688570261001587, -0.15181966125965118, -0.10881758481264114, -0.3748325705528259, 0.2330271601676941, 0.7945696115493774, -0.08265218883752823, -0.09524659812450409, 0.30316832661628723, 0.3593435287475586, 0.09973400086164474, 0.27374058961868286, -0.33718472719192505, -0.16899335384368896, -0.3674463927745819, 0.3352336585521698, -0.16260391473770142, 0.25306761264801025, 0.22161145508289337, 0.31566035747528076, -0.17112936079502106, -0.23130418360233307, 0.2426881045103073, -0.29595205187797546, -0.46273526549339294, 0.23411217331886292, 0.06157199293375015, 0.11077898740768433, -0.12177851051092148, 0.010585717856884003, 0.01558752078562975, -0.025640161707997322, -0.059116020798683167, 0.274632066488266, 0.2118445187807083, -0.4396178424358368, -0.29100391268730164, 0.2558198571205139, -0.3576001226902008, -0.026536745950579643, -0.5041508078575134, -0.2361040860414505, 0.3103284537792206, -0.7759175896644592, -0.4814240634441376, 0.24266143143177032, -0.02505115047097206, 0.12123087048530579, -0.03114405833184719, 0.14897318184375763, -0.1181487962603569, -0.32437050342559814, -0.11094478517770767, -0.15305233001708984, 0.3299349248409271, -0.36727991700172424, 0.01841294765472412, -0.28200390934944153, 0.13116894662380219, 0.05932322517037392, -0.2525380551815033, -0.4533592164516449, -0.14273934066295624, 0.559302806854248, 0.055042482912540436, 0.8176108598709106, -0.22705277800559998, 0.07291451096534729, 0.048098091036081314, 0.5219201445579529, 0.28603246808052063, 0.15168458223342896, -0.2513127624988556, 0.10632771253585815, 0.2541932761669159, 0.5256456732749939, -0.017639677971601486, -0.44779303669929504, 0.4068180024623871, 0.22652988135814667, -0.01913384534418583, 0.150533065199852, -0.41783714294433594, 0.012582438997924328, -0.14541342854499817, 0.11108599603176117, -0.41605144739151, -0.12267663329839706, -0.4658852815628052, 0.011297838762402534, 0.28086695075035095, -0.45632484555244446, -0.48757404088974, 0.19630485773086548, 0.2096371054649353, 0.3782965838909149, 0.21138116717338562, -0.10597914457321167, 0.02392367087304592, -0.19251510500907898, -0.1096508726477623, 0.3244122564792633, 0.09630604088306427, -0.06131615489721298, 0.11286719143390656, 0.03718824312090874, -0.004309963434934616, 0.11791114509105682, 0.42555922269821167, -0.4028955101966858, -0.07429098337888718, -0.45523613691329956, 0.22331055998802185, -0.10658273100852966, -0.423147052526474, -0.17856967449188232, -0.4005957841873169, -0.5432351231575012, -0.0176351610571146, 0.03664989769458771, -0.002382181817665696, -0.06994977593421936, -0.16579380631446838, -0.14956189692020416, -0.01568291150033474, -0.03745923936367035, 0.27353325486183167, 0.5747727751731873, -0.7175341248512268, -0.3966255486011505, -0.02451392635703087, 0.1525379717350006, -0.26109302043914795, 0.5119063258171082, 0.0909222885966301, -0.05924813821911812, 0.748018205165863, 0.08463823050260544, 0.17167331278324127, 0.23774726688861847, -0.29612329602241516, -0.10731074959039688, -0.0003052879183087498, -0.17944982647895813, 0.5420880317687988, 0.10019166767597198, -0.040426015853881836, -0.13499382138252258, 0.08432544022798538, 0.04568551480770111, 0.15995411574840546, 0.31545746326446533, 0.742709755897522, -0.10210463404655457, -0.21995402872562408, 0.022989006713032722, -0.25116056203842163, 0.8392542004585266, -0.10833866149187088, -0.10923579335212708, 0.46815475821495056, 0.17943914234638214, -0.0938076302409172, 0.030697599053382874, 0.10053601115942001, 0.27241143584251404, 0.03570105880498886, 0.08691401779651642, 0.00016868687816895545, -0.020626341924071312, -0.2688244581222534, -0.20321418344974518, -0.22544388473033905, 0.025767488405108452, 0.36727356910705566, 0.3730204701423645, 0.1788012832403183, 0.6381354331970215, 0.13043330609798431, 0.5759032964706421, 0.533464252948761, 0.1888342797756195, -0.1773713231086731, 0.22184589505195618, -0.2494330257177353, 0.40202832221984863, -0.10824012011289597, -0.27636000514030457, -0.05542067438364029, -0.7025393843650818, -0.343550443649292, 0.39494380354881287, 0.20228202641010284, 0.05767717957496643, -0.08837166428565979, -0.13569700717926025, 0.48201969265937805, 0.40366989374160767, -0.07732390612363815, -0.009899594821035862, -0.04040025174617767, -0.5216374397277832, -0.09722816199064255, -0.1389821618795395, -0.12408744543790817, 0.36749809980392456, -0.2644154131412506, 0.10239838808774948, -0.05567016080021858, 0.2485373616218567, -0.6926348209381104, 0.2774834930896759, 0.12973521649837494, -0.12107876688241959, -0.20870482921600342, 0.6279644966125488, 0.14167827367782593, -0.0584668405354023, 0.3323007822036743, -0.6466537714004517, -0.11618972569704056, -0.17787238955497742, -0.05062638223171234, -0.5353887677192688, 0.58005690574646, 0.1260121762752533, 0.19225646555423737, -0.36843380331993103, 0.18880420923233032, 0.24915635585784912, 0.15861594676971436, 0.1546304076910019, -0.12871338427066803, 0.18898141384124756, -0.6568084955215454, -0.19002126157283783, 0.0026913881301879883, 0.3661579191684723, -0.05343887209892273, -0.019218172878026962, 0.017054665833711624, 0.11232321709394455, 0.17116566002368927, -0.038301870226860046, -0.14845383167266846, 0.06855752319097519, 0.4545389413833618, -0.027458319440484047, -0.0003638008201960474, 0.31851205229759216, 0.6173984408378601, 0.19891642034053802, 0.3691411316394806, -0.4358605146408081, 0.28293389081954956, -0.3429502546787262, -0.6489775776863098, -0.3231638967990875, 0.12504562735557556, 0.9089100360870361, 0.15567851066589355, -0.015130904503166676, -0.08056613802909851, -0.047541528940200806, 0.11559204757213593, 0.1726004183292389, 0.17754250764846802, -0.17384912073612213, -0.32749152183532715, 0.32958903908729553, -0.3938862681388855, 0.46293285489082336, 0.45727550983428955, 0.0650746151804924, 0.08214152604341507, 0.8980920910835266, -0.20701728761196136, -0.0280876774340868, 0.1897929608821869, 0.10895749926567078, 0.02063624933362007, -0.463064044713974, 0.045462898910045624, 0.4993920624256134, 0.04089194908738136, 0.3412054181098938, 0.1361744999885559, 0.6069399118423462, -0.151450514793396, 0.007609713822603226, 0.08968210965394974, -0.2254098355770111, -0.6730085611343384, -0.15031397342681885, -0.012673566117882729, 0.20594078302383423, 0.45809900760650635, 0.1014915183186531, -0.32107535004615784, 0.11747660487890244, 0.017664194107055664, -0.41973572969436646, -0.051987405866384506, 0.24891826510429382, -0.08149053901433945, -0.12319417297840118, 0.5640538334846497, 0.06268052756786346, -0.19212594628334045, -0.2801172137260437, -0.7598652839660645, -0.7308480739593506, -0.20280063152313232, -0.2683790326118469, 0.1680055409669876, -0.5896601676940918, -0.043765462934970856, -0.3952829837799072, 0.13314171135425568, 1.2533466815948486, 0.13831208646297455, -0.12059636414051056, -0.13551771640777588, 0.0828748345375061, 0.19787991046905518, -0.24861042201519012, 0.10170146822929382, 0.195473313331604, 0.16725406050682068, -0.5096805095672607, 0.17561712861061096, 0.018309803679585457, -0.29783326387405396, -0.3821774423122406, 0.1031126081943512, -0.16069503128528595, 0.4600163400173187, 0.0281543992459774, 0.8683497905731201, -0.44507792592048645, -0.18144360184669495, 0.771272599697113, 0.12722156941890717, 0.013169093988835812, -0.06129293888807297, 0.38022705912590027, 0.3443385362625122, -0.15287601947784424, -0.33235839009284973, 0.04498008266091347, 0.03892466425895691, -0.2597588300704956, -0.22316761314868927, 0.014986519701778889, -0.6829012036323547, 0.023409448564052582, -0.06337610632181168, -0.08331216126680374, -0.41589489579200745, -0.057987961918115616, -0.8414398431777954, -0.0809958204627037, -0.16106365621089935, -0.2990178167819977, -0.16088980436325073, -0.6248951554298401, -0.18796363472938538, 0.0754706934094429, 0.28028830885887146, 0.026637084782123566, 0.3421093225479126, 0.2593136727809906, 0.16810204088687897, -0.6023728251457214, -0.18793635070323944, 0.3295923173427582, -0.09681511670351028, -0.08130903542041779, 0.19372263550758362, 0.2661222517490387, 0.004713625647127628, 0.07113087922334671, 0.2915855646133423, -0.30629608035087585, -0.28460177779197693, -0.6682390570640564, 0.6441652178764343, -0.16213975846767426, -0.1689072847366333, 0.41650325059890747, 0.20656007528305054, 0.4112269878387451, 0.4877035617828369, -0.08950250595808029, 0.764882504940033, -0.10727085173130035, 0.8159299492835999, -0.3335723876953125, 0.7624181509017944, -0.1696121245622635, 0.3752380311489105, -0.2978610098361969, 0.03207946941256523, 0.1104874536395073, -0.3956119418144226, 0.0651499330997467, -0.16470666229724884, -0.5290653705596924, -0.022188294678926468, -0.08743266761302948, 0.572999894618988, -0.07063199579715729, 0.10353940725326538, 0.23915168642997742, 0.4792330265045166, 0.10678274184465408, -0.05438380688428879, 0.15389949083328247, 0.2788258492946625, 0.17076273262500763, -0.6062144637107849, -0.16919590532779694, -0.15459327399730682, 0.0983126237988472, 0.3236895501613617, -0.07458766549825668, 0.1823810636997223, -0.32407620549201965, -0.04709937050938606, 0.1861359179019928, -0.4189734160900116, 0.510794460773468, -0.055382128804922104, -0.4483998119831085, 0.08733933418989182, 0.12071559578180313, 0.2586846649646759, -0.09476444125175476, -0.26059624552726746, 0.3850177526473999, -0.0975179374217987, -0.15911896526813507, 0.22751428186893463, 0.2336295247077942, 0.39146101474761963, -0.18821841478347778, -0.23190368711948395, 0.24077892303466797, 0.28754884004592896, -0.16481514275074005, 0.3935346305370331, -0.04524362459778786, 0.2520402669906616, 0.19695064425468445, 0.16443318128585815, 0.03160640224814415, -0.2111990600824356, -0.12908828258514404, -0.00029773227288387716, 0.18110555410385132, 0.7622416019439697, -0.11568242311477661, -22.875402450561523, 0.13583484292030334, -0.10580679029226303, 0.09653570502996445, -0.343109130859375, -0.06885900348424911, 0.019572870805859566, -0.3912614583969116, 0.0017159351846203208, -0.38112208247184753, 0.4874259829521179, 0.32542815804481506, -0.15436162054538727, -0.17993061244487762, -0.516795814037323, 0.444772332906723, -0.41161033511161804, -0.17851854860782623, -0.27131739258766174, 0.2356998771429062, 0.11531949043273926, -0.12353351712226868, -0.6701112985610962, 0.580638587474823, -0.12444451451301575, 0.3674747049808502, 0.3318359851837158, -0.17697320878505707, -0.13673615455627441, 0.07914067059755325, 0.7188063859939575, 0.08331448584794998, 0.09157633036375046, 0.20472382009029388, -0.24350982904434204, -0.12361833453178406, 0.1267748773097992, -0.3096349537372589, 0.42674461007118225, 0.15924911201000214, 0.0944075882434845, -0.03562699630856514, 0.2679997682571411, -0.03111250139772892, -0.0009868526831269264, 0.10894301533699036, -0.07778171449899673, 0.04272936284542084, -0.27791929244995117, -0.23306035995483398, -0.0752781480550766, -0.018511107191443443, -0.4299067258834839, 0.23079484701156616, -0.19088105857372284, 0.23530873656272888, -0.14979994297027588, 0.7327012419700623, 0.14247305691242218, -0.15858370065689087, -0.13167743384838104, 0.29272961616516113, -0.08187443017959595, -0.23130790889263153, -0.46118298172950745, 0.1370575726032257, -0.15456850826740265, 0.24673256278038025, -0.4817448854446411, 0.2527800500392914, -0.27325472235679626, 0.3557480275630951, 0.3515531122684479, -0.4434742331504822, 0.07870133221149445, -0.6925514340400696, 0.31557604670524597, -0.07367374747991562, 0.07636887580156326, -0.06266651302576065, -0.7610475420951843, -0.24443073570728302, 0.15125121176242828, -0.5786067843437195, -0.448729932308197, 0.015318646095693111, -0.12385954707860947, 0.13407428562641144, 0.20989856123924255, 0.10900304466485977, 0.19084937870502472, 0.0806107148528099, 0.21364064514636993, 0.09191608428955078, -0.2394341230392456, 0.41842541098594666, -0.3347131311893463, -0.0031154369935393333, -0.4002193808555603, -0.16132000088691711, -0.24058976769447327, 0.8507474660873413, 0.5771454572677612, 0.013961591757833958, -0.01553199253976345, -0.08483561873435974, 0.6529181599617004, -0.46563756465911865, -0.11241040378808975, 0.0982234925031662, 0.18207497894763947, -0.32312947511672974, -0.24612915515899658, -0.23466435074806213, 0.31164076924324036, -0.4140669107437134, 0.14610251784324646, -0.06238577514886856, -0.07960520684719086, -0.36895668506622314, -0.7119725942611694, -0.203611359000206, 0.10942943394184113, 0.4524650573730469, -0.3015919029712677, 0.1355704367160797, -0.47148895263671875, 0.10330488532781601, 0.28471699357032776, 0.03759327530860901, -0.016262400895357132, -0.3726906180381775, -0.4768540561199188, -0.07301253080368042, -0.22662754356861115, -0.08835355937480927, -0.1456327736377716, -0.24207374453544617, 0.36496105790138245, -0.17391595244407654, -1.0584312677383423, 0.3338273763656616, 0.04173850640654564, 0.06870602816343307, -0.043751515448093414, -0.29047247767448425, -0.41002631187438965, 0.2496984452009201, 0.4215340316295624, -0.1753990352153778, -0.4264443516731262, 0.2787784934043884, 0.2709302008152008, 0.030074529349803925, 0.20826394855976105, -0.2725193202495575, 0.23832757771015167, -0.10702259093523026, -0.2561227083206177, 0.07290633767843246, 0.21239244937896729, -0.2611154615879059, -0.26570120453834534, -0.02486363612115383, -0.004846008028835058, 0.11775627732276917, -0.30454903841018677, 0.12990789115428925, -0.06846444308757782, -0.13617631793022156, 0.36070820689201355, 0.09718894958496094, 0.3089953660964966, -0.3559059500694275, -0.35925671458244324, -0.10083470493555069, 0.5587205290794373, 0.12603627145290375, 0.3904274106025696, 0.39416003227233887, 0.23182950913906097, 0.08337914198637009, -0.2254374474287033, 0.09886789321899414, -0.24126169085502625, 0.11112575232982635, 0.2262369990348816, 0.0713328868150711, 0.044923409819602966, 0.0036878478713333607, 0.6643993854522705, -0.08366969972848892, 0.02640020288527012, 0.3802068829536438, -0.021017584949731827, 0.13683219254016876, 0.23885993659496307, -0.19641384482383728, -0.38498207926750183, 0.09469975531101227, -0.6500173807144165, 0.14977996051311493, 0.2521587610244751, 0.23444795608520508, -0.051519978791475296, -0.11140663176774979, 0.05347289890050888, 0.0354580394923687, 0.07873336970806122, 0.1762273758649826, 0.3054947555065155]} 5 | {"_op_type": "index", "_index": "researchgate", "title": "The influence of hashed fingerprints density on the machine learning methods performance", "abstract": "Computational techniques have become a vital part of today's drug discovery campaigns. Among a wide range of tools applied in this process, machine learning methods can be distinguished. They are used for instance in virtual screening (VS), where its role is to identify potentially active compounds out of large libraries of structures [1]. In order to enable the application of various learning algorithms in VS tasks, an appropriate representation of molecules is needed. One of the solutions comes from the hashed fingerprints, encoding the information about the structure in a form of a bit string [2]. Both length and density (the percentage of 1's) can be modified during hashed fingerprint generation, which (as it was already proved) influence the similarity searching process [3]. The aim of our study was to examine the impact of such fingerprint density on the performance of machine learning methods. A series of bit strings with different density values and of various lengths was generated by means of the RDKit software [4]. They were tested in classification tests of 5-HT 1A ligands, with the use of a set of algorithms (Na\u00efve Bayes, SMO, Ibk, Decorate, Hyper-pipes, J48 and Random Forest), in order to determine an optimal values of the variables for machine learning experiments.", "abstract_vector": [0.33842042088508606, -0.4263402819633484, -0.18693391978740692, 0.07598099112510681, 0.008785979822278023, -0.5820921063423157, 0.3055143654346466, 0.1825140416622162, 0.023452015593647957, 0.31982725858688354, 0.07207503914833069, 0.3149698078632355, -0.11770449578762054, 0.13245025277137756, -0.7228578329086304, -0.5469396710395813, -0.2697228193283081, -0.08482757210731506, 0.19709981977939606, -0.5150023698806763, -0.21408148109912872, -0.4677596986293793, 0.2039192020893097, -0.6048620939254761, -0.3465026021003723, -0.6757450699806213, 0.1444675326347351, -0.24248771369457245, -0.6524584293365479, 0.17844055593013763, -0.12310049682855606, 0.3236161172389984, -0.06344848871231079, 0.2118285894393921, -0.3994600772857666, 0.15231981873512268, 0.0643453449010849, 0.005607855971902609, -0.7491262555122375, -0.26611432433128357, 0.34848523139953613, 0.718029797077179, -0.26301679015159607, -0.08875562995672226, -0.14565825462341309, 0.20097360014915466, -0.16648739576339722, 0.10985060036182404, -0.05584585666656494, -0.2870381474494934, 0.33698540925979614, 0.225315660238266, -0.3184998333454132, -0.2671559453010559, -0.15675488114356995, -0.08218938112258911, -0.2842913568019867, -0.2869257926940918, -0.31631505489349365, 0.1265566200017929, 0.1784043312072754, -0.24358822405338287, 0.2244485467672348, 0.10740306228399277, 0.2241126298904419, -0.005607226863503456, -0.2580968141555786, 0.02458096668124199, 0.5793499946594238, -0.6006152629852295, -0.531960666179657, -0.3124126195907593, 0.2864530086517334, 0.19868862628936768, -0.20726634562015533, -0.21580184996128082, 0.011511428281664848, -0.4316934645175934, -0.4734101891517639, -1.3614360094070435, 0.11678381264209747, -0.07108159363269806, 0.13015981018543243, 0.6230457425117493, -0.08668303489685059, -0.009539364837110043, 0.07066776603460312, 0.41886183619499207, 0.5701119899749756, 0.7919387221336365, -0.025261396542191505, 0.22398662567138672, 0.18707697093486786, 0.1432598978281021, 0.08867287635803223, 0.055687665939331055, -0.19845372438430786, 0.01183273270726204, -0.505998432636261, 0.13973718881607056, 0.6379413604736328, -0.1404731720685959, -0.4689222276210785, 0.1436307281255722, 0.14109735190868378, 0.3958543539047241, -0.02122628688812256, -0.7868687510490417, -0.09214575588703156, 0.0008973223157227039, 0.23349210619926453, -0.09383008629083633, 0.5114398002624512, -0.09196262061595917, 0.49873125553131104, -0.19718781113624573, -0.12501667439937592, -0.4730969965457916, -0.1379890888929367, -0.23496676981449127, 0.12262136489152908, -0.09296679496765137, 0.26097968220710754, -0.4206048548221588, -0.2928176820278168, 0.11485206335783005, -0.06155567988753319, -0.292842835187912, 0.35987386107444763, -0.08619917929172516, -0.6296300292015076, -0.3712456524372101, -0.07309211790561676, -0.15734252333641052, -0.06424091011285782, -0.08215257525444031, -0.5244252681732178, 0.36008334159851074, -0.7285128235816956, -0.5666877627372742, -0.10463877767324448, -0.10163950175046921, 0.2072945386171341, -0.6466391086578369, 0.3067852854728699, 0.20634233951568604, -0.3933916985988617, 0.4296724796295166, -0.603803277015686, 0.7891685962677002, -0.022082660347223282, 0.6511677503585815, -0.30070960521698, 0.24502651393413544, 0.02796703390777111, -0.03930799663066864, -0.2965870797634125, -0.25612136721611023, 0.5186445713043213, -0.4632081985473633, 0.480994313955307, 0.1381869614124298, 0.3625669479370117, 0.1545078009366989, 0.4085603654384613, 0.435248464345932, 0.5999117493629456, 0.13910101354122162, 0.3157888650894165, 0.11756481230258942, 0.6299721002578735, -0.027011767029762268, -0.43505364656448364, 0.28860342502593994, 0.18655389547348022, 0.16396453976631165, -0.02146025188267231, 0.05924980714917183, 0.15035520493984222, -0.6400336623191833, 0.08626513183116913, -0.0015109293162822723, -0.14487223327159882, -0.4803684949874878, 0.009750248864293098, 0.28292059898376465, -0.26596060395240784, -0.5002309679985046, 0.4066203832626343, 0.611960768699646, 0.5076095461845398, 0.03393973410129547, -0.33954864740371704, -0.019858598709106445, -0.12167415767908096, 0.28939759731292725, 0.15606461465358734, 0.4017041325569153, -0.13008148968219757, 0.08722115308046341, -0.026565177366137505, 0.08433672785758972, -0.2535562813282013, 0.8704733848571777, 0.010543610900640488, 0.08621720969676971, -0.7935497760772705, 0.3473285436630249, -0.263316810131073, -0.28165024518966675, -0.34912943840026855, 0.20784132182598114, -0.4293929934501648, 0.09269046038389206, 0.1112549751996994, 0.08882615715265274, -0.6748408675193787, -0.1350490003824234, 0.2599223256111145, -0.08232129365205765, -0.023224789649248123, 0.36140069365501404, 0.1554057002067566, -0.8245221972465515, 0.048038072884082794, 0.31612157821655273, 0.2596220076084137, 0.03297041729092598, 0.7537334561347961, 0.3364410400390625, 0.40890252590179443, 0.7429632544517517, -0.029574763029813766, 0.21739475429058075, 0.32839688658714294, -0.37938910722732544, -0.3853084146976471, -0.09451902657747269, -0.1600874662399292, 0.3603889048099518, 0.07591842859983444, 0.188155859708786, -0.1689319759607315, 0.015061321668326855, -0.05726100504398346, 0.25248223543167114, 0.198883056640625, 0.49321484565734863, -0.24314701557159424, -0.213767409324646, -0.1771364063024521, -0.4482969343662262, 0.4034635126590729, 0.05852325260639191, -0.20277804136276245, 0.198006272315979, -0.10099396854639053, -0.274432897567749, 0.45428285002708435, 0.373922199010849, 0.46570897102355957, -0.08774549514055252, 0.015017619356513023, 0.10210803896188736, 0.09963410347700119, -0.42837655544281006, -0.26645752787590027, -0.5286855101585388, 0.05497374385595322, 0.29316943883895874, 0.02794603630900383, 0.0921340361237526, 0.3903372585773468, 0.461671382188797, 0.5175092220306396, 0.6386683583259583, 0.3799774944782257, -0.44236278533935547, -0.021578900516033173, -0.23200179636478424, 0.44062840938568115, -0.09119585901498795, -0.2431894838809967, 0.10013162344694138, -1.1375640630722046, -0.0016615897184237838, -0.30926963686943054, 0.24741706252098083, -0.37876319885253906, -0.09159300476312637, -0.3047162890434265, 0.5996962189674377, -0.013694453984498978, -0.521988570690155, 0.17805097997188568, -0.41123172640800476, -0.7888810634613037, 0.2056845873594284, 0.03818226233124733, 0.34115883708000183, 0.08926960080862045, 0.08801232278347015, -0.16177509725093842, 0.31999391317367554, 0.16550736129283905, -0.6299136877059937, -0.09087856113910675, 0.44305893778800964, 0.30629876255989075, -0.2944416105747223, -0.02554481290280819, 0.344643235206604, -0.5035003423690796, 0.4345569908618927, -0.301309198141098, -0.6188919544219971, 0.018487941473722458, -0.08878292143344879, 0.0972164049744606, -0.18476037681102753, -0.20974236726760864, 0.020190130919218063, -0.8689672946929932, 0.16342943906784058, -0.02325308322906494, 0.11959940940141678, 0.3953283727169037, -0.1440804898738861, 0.12783415615558624, -0.03541354089975357, -0.26179444789886475, -0.42772865295410156, 0.38232526183128357, -0.34424999356269836, -0.013700839132070541, 0.06267830729484558, 0.03220263868570328, 0.008641096763312817, 0.06975335627794266, -0.293122261762619, 0.2307129055261612, 0.5647791028022766, -0.17950685322284698, -0.15505093336105347, 0.3422781825065613, 0.5830680727958679, 0.09015671908855438, 0.21829605102539062, -0.3145771920681, -0.36300477385520935, 0.01759210415184498, -0.44703587889671326, -0.04031794145703316, -0.007855969481170177, 1.1289069652557373, 0.32531359791755676, 0.12350925803184509, -0.016928303986787796, 0.2860831916332245, 0.5056519508361816, -0.18813404440879822, 0.41012412309646606, 0.3406871557235718, -0.08624880015850067, 0.2667860686779022, -0.39872363209724426, 0.02829626016318798, 0.556812047958374, 0.08276653289794922, -0.1316125988960266, 0.41733673214912415, 0.4598485231399536, -0.04510819911956787, 0.6398160457611084, -0.11031628400087357, -0.20444677770137787, 0.1461235135793686, 0.08142349869012833, 0.6862602233886719, -0.5517325401306152, 0.09824089705944061, 0.6643730998039246, -0.4433242082595825, -0.2088559865951538, -0.0010434490395709872, -0.006861571222543716, 0.45329102873802185, -0.4951093792915344, -0.35081931948661804, 0.05247737839818001, 0.5716513395309448, -0.13441091775894165, 0.4513288736343384, -0.019169695675373077, 0.09415474534034729, -0.24411427974700928, -0.43105828762054443, 0.4484291076660156, 0.0500045120716095, 0.14404000341892242, -0.44482097029685974, 0.3383842706680298, -0.007835152558982372, -0.42962610721588135, -0.5181068181991577, -0.6389529705047607, -1.1051342487335205, -0.2841700613498688, -0.7280681729316711, 0.5705859661102295, -0.1540089100599289, -0.04595270752906799, -0.09290668368339539, -0.2016221433877945, 0.9551562666893005, -0.36889755725860596, -0.4631878733634949, -0.3798060119152069, -0.03466804325580597, 0.27399906516075134, 0.09181217849254608, -0.1753648966550827, -0.07228908687829971, -0.5609145164489746, -0.6173689365386963, -0.1845182478427887, -0.09885536879301071, -0.15992923080921173, -0.5727373957633972, -0.10458499193191528, -0.12471358478069305, 0.25157099962234497, 0.4502182900905609, 0.5800489187240601, -0.05256539210677147, -0.22801487147808075, 0.31252115964889526, 0.4063433110713959, 0.3790249526500702, -0.12739120423793793, 0.5257766246795654, 0.1238689199090004, -0.338158518075943, -0.5488073229789734, -0.45590096712112427, 0.15981920063495636, 0.053697243332862854, -0.29377493262290955, -0.11290669441223145, -0.4558066129684448, 0.09708049893379211, -0.5497621893882751, 0.6952877044677734, -0.2949441373348236, -0.20568670332431793, -0.25066909193992615, 0.20788194239139557, 0.0828804224729538, -0.3205100893974304, 0.015627941116690636, -0.5175909996032715, -0.5469081997871399, 0.0003189857234247029, 0.18069560825824738, -0.1572089046239853, -0.2143837958574295, 0.1276681274175644, -0.06948426365852356, -0.8026075959205627, 0.2085910588502884, 0.6298701763153076, -0.08145620673894882, 0.1882135272026062, 0.2212427854537964, 0.6753458976745605, 0.2589833438396454, -0.019339052960276604, 0.4337731897830963, -0.10927686840295792, -0.07546009868383408, -0.3820595145225525, 0.36630532145500183, -0.07086826115846634, -0.2908079922199249, 0.4737686514854431, 0.3572714626789093, 0.22676624357700348, -0.055919669568538666, 0.16011914610862732, 0.668528139591217, -0.0632222592830658, 0.41762444376945496, 0.4021649658679962, 0.34123528003692627, -0.3970247507095337, -0.4987843632698059, -0.3952328562736511, 0.09697228670120239, 0.24558410048484802, -0.6615297198295593, 0.14615224301815033, 0.21690259873867035, -0.407555490732193, 0.16762562096118927, 0.5087302923202515, 0.5124941468238831, -0.4301638901233673, -0.5254198908805847, -0.03578983247280121, 0.6652815937995911, 0.1367838978767395, 0.04898500815033913, 0.22097738087177277, 0.22577279806137085, 0.18962030112743378, -0.1829565018415451, 0.22661149501800537, 0.09632713347673416, -0.19109275937080383, 0.27763375639915466, 0.345638245344162, 0.29595956206321716, -0.2829558849334717, -0.24633005261421204, 0.5555241107940674, -0.32974979281425476, 0.576398491859436, -0.08506874740123749, -0.43652257323265076, -0.5197297930717468, 0.21043506264686584, 0.17639590799808502, 0.34317806363105774, -0.23980486392974854, 0.3347528874874115, -0.0572253093123436, -0.05466452240943909, 0.17400754988193512, -0.05173032730817795, 0.38616421818733215, -0.48052704334259033, 0.05676288530230522, 0.026928268373012543, 0.6391205787658691, -0.12749797105789185, 0.566977858543396, -0.3032076358795166, 0.25249603390693665, 0.4586711525917053, 0.232834592461586, -0.2822183668613434, 0.048858266323804855, 0.37341058254241943, 0.16205346584320068, -0.4038492441177368, 0.4429946541786194, -0.40037158131599426, -23.993234634399414, 0.24980571866035461, 0.4150451421737671, -0.23960265517234802, 0.036313991993665695, -0.38664042949676514, 0.23902375996112823, -0.6042793393135071, 0.1989133507013321, -0.2675817012786865, 0.4787449240684509, 0.10380223393440247, -0.46924129128456116, -0.27678728103637695, -0.38723224401474, 0.21485862135887146, -0.31382226943969727, -0.04414469376206398, -0.24349211156368256, -0.15458163619041443, -0.154371976852417, -0.23981007933616638, -0.3071669936180115, 0.3942725658416748, 0.0059698401018977165, 0.1786767989397049, -0.07938949763774872, 0.09174051135778427, -0.14799796044826508, 0.08466369658708572, 0.22821781039237976, 0.22926779091358185, 0.3975406289100647, 0.09765399992465973, -0.05198667198419571, -0.09125446528196335, 0.09266557544469833, -0.27045777440071106, 0.560712456703186, -0.10057463496923447, 0.16337138414382935, 0.3094087839126587, 0.24820874631404877, -0.3624880313873291, 0.2507629692554474, 0.2325933873653412, 0.11636116355657578, -0.13626529276371002, -0.39727547764778137, -0.13218556344509125, -0.07369427382946014, 0.32143089175224304, 0.07612144201993942, 0.077667236328125, -0.39083588123321533, 0.4161825478076935, -0.08399072289466858, 0.6586775183677673, 0.5547205209732056, -0.918072521686554, -0.37012046575546265, -0.02266678772866726, -0.15889938175678253, -0.017171310260891914, 0.08516157418489456, 0.7007209658622742, 0.07753214240074158, 0.0912737250328064, -0.36876052618026733, 0.0002628084912430495, -0.4818188548088074, 0.34477558732032776, 0.4220457375049591, -0.1942380964756012, -0.13366994261741638, -0.5772868990898132, 0.1496095508337021, 0.32970646023750305, -0.08841586112976074, 0.3859923183917999, -0.7048061490058899, -0.047979701310396194, 0.2619478702545166, -0.2080172300338745, 0.17837363481521606, 0.4519639313220978, -0.04576873779296875, 0.5676679015159607, -0.01632002554833889, 0.06074433773756027, 0.04366881772875786, 0.3691191077232361, 0.4438380002975464, 0.892859697341919, -0.38266414403915405, -0.17105217278003693, -0.4833928346633911, 0.10597565025091171, -0.8416383862495422, -0.3358423113822937, -0.4972095489501953, 0.8026090264320374, 0.4738699197769165, 0.5420113205909729, -0.18209899961948395, 0.025572706013917923, 0.5503890514373779, -0.7415544390678406, -0.004456056747585535, 0.09060308337211609, -0.06921620666980743, 0.28313523530960083, -0.23894280195236206, -0.003468424314633012, 0.42870768904685974, -0.10204873234033585, -0.22206783294677734, 0.13032850623130798, -0.07456564158201218, -0.2771202027797699, -0.6678699254989624, -0.4295669496059418, -0.041554760187864304, 0.2407056987285614, -0.36077603697776794, -0.3744293451309204, 0.05598034709692001, 0.2183098942041397, 0.036429740488529205, 0.12876179814338684, 0.3160586655139923, -0.6030619144439697, -0.3281005024909973, -0.06157254800200462, -0.15492090582847595, -0.17076070606708527, -0.023488130420446396, 0.31810036301612854, -0.03181370347738266, -0.37400949001312256, -0.6342881917953491, 0.3433243930339813, 0.5462055802345276, -0.15907754004001617, 0.03926275297999382, -0.45997416973114014, 0.553528368473053, -0.04609813541173935, 0.5675085186958313, 0.0021735099144279957, -0.30429476499557495, 0.34237903356552124, 0.5962918996810913, 0.08192683011293411, 0.1427510380744934, -0.1705242395401001, 0.1385565847158432, 0.047381602227687836, -0.0896889939904213, -0.16219133138656616, 0.3457886576652527, -0.4034489691257477, -0.11764799803495407, -0.3958416283130646, 0.15397752821445465, -0.09389283508062363, -0.14679959416389465, 0.005999730434268713, 0.32429641485214233, 0.14203940331935883, 0.021727170795202255, 0.4734056890010834, 0.14526048302650452, -0.7075374722480774, -0.06776705384254456, 0.01768682710826397, 0.07005852460861206, -0.1770123690366745, 0.2929307222366333, -0.1282261461019516, 0.32062336802482605, 0.6114449501037598, -0.35943326354026794, 0.24337653815746307, -0.18933998048305511, 0.10973995923995972, -0.21135035157203674, 0.31579703092575073, -0.14762426912784576, 0.0737491101026535, 0.6097781658172607, 0.00619620643556118, 0.14222264289855957, 0.08419465273618698, 0.4918636679649353, 0.36807554960250854, 0.2557523846626282, 0.19514869153499603, -0.05158950015902519, -0.13971953094005585, -0.4985641837120056, 0.06284324079751968, 0.40380391478538513, -0.19266267120838165, -0.5037813186645508, -0.5212719440460205, 0.2748047113418579, 0.15560390055179596, 0.009622538462281227, 0.16637872159481049, 0.22156919538974762]} 6 | -------------------------------------------------------------------------------- /elastic/index_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings" : { 3 | "number_of_shards": 1, 4 | "number_of_replicas": 1 5 | }, 6 | 7 | "mappings": { 8 | "properties": { 9 | "title": {"type": "text"}, 10 | "abstract": {"type": "text"}, 11 | "abstract_vector": {"type": "dense_vector", "dims": 768} 12 | }} 13 | } 14 | -------------------------------------------------------------------------------- /elastic/index_documents.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Actual indexing is done here. 4 | 5 | """ 6 | 7 | import json 8 | from argparse import ArgumentParser 9 | from elasticsearch import Elasticsearch 10 | from elasticsearch.helpers import bulk 11 | 12 | 13 | def load_dataset(path): 14 | with open(path) as f: 15 | return [json.loads(line) for line in f] 16 | 17 | 18 | def main(args): 19 | client = Elasticsearch('localhost:9200') 20 | docs = load_dataset(args.data) 21 | bulk(client, docs) 22 | 23 | 24 | if __name__ == '__main__': 25 | parser = ArgumentParser(description='indexing ES documents.') 26 | parser.add_argument('--data', help='ES documents.') 27 | args = parser.parse_args() 28 | main(args) --------------------------------------------------------------------------------