├── .gitignore ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── docker └── weaviate │ ├── docker-compose-cpu.yml │ └── docker-compose-gpu.yml ├── requirements.txt └── src ├── ali ├── 1_file_to_pickles.py ├── 2_pickle_to_opensearch.py ├── 3_pickle_to_pinecone.py ├── legancy │ ├── 1_docx_pickle.py │ ├── 1_docx_pickle_opensearch.py │ ├── 1_docx_pickle_pinecone.py │ ├── 2_pptx_pickle.py │ ├── 2_pptx_pickle_opensearch.py │ ├── 2_pptx_pickle_pinecone.py │ ├── 3_pdf_pickle.py │ ├── 3_pdf_pickle_opensearch.py │ ├── 3_pdf_pickle_pinecone.py │ ├── 5_chunk_by_title_pdf_pickle.py │ ├── 6_chunk_by_title_pdf_pickle_with_page_number.py │ ├── 7_chunk_by_title_pdf_pickle_pinecone_with_switch.py │ ├── 99_delete_pinecone_by_namespace.py │ ├── chunk_by_title_docx_weaviate.py │ ├── chunk_by_title_from_pickle_to_weaviate.py │ ├── chunk_by_title_papers_weaviate.py │ ├── chunk_by_title_pdf_pickle_with_tools.py │ ├── chunk_by_title_pdf_weaviate.py │ ├── chunk_by_title_pptx_pickle_with_detail_message.py │ └── chunk_by_title_pptx_weaviate.py └── tools │ ├── chunk_by_sci_pdf.py │ ├── unstructure_docx.py │ ├── unstructure_pdf.py │ ├── unstructure_pptx.py │ ├── vision.py │ └── weaviate_basic_functions.py ├── edu_textbooks ├── QA.py ├── chunk_by_title_0.py ├── chunk_by_title_1.py ├── pickle_to_opensearch_aws.py ├── pickle_to_pinecone_aws.py ├── split_list.py └── tools │ ├── unstructure_docx.py │ ├── unstructure_pdf.py │ └── vision.py ├── education ├── 1_file2pickle.py ├── legacy │ ├── 0_create_opensearch_index.py │ ├── 0_pdf_pickle_service_0.py │ ├── 0_pdf_pickle_service_1.py │ ├── 0_pdf_pickle_service_2.py │ ├── 0_pickle_service.py │ ├── 1_chunk_by_title_docx_pickle.py │ ├── 2_chunk_by_title_pdf_pickle.py │ ├── 3_chunk_by_title_pptx_pickle.py │ ├── 4_chunk_by_title_pickle_embedding.py │ ├── 4_pickle_to_pinecone.py │ ├── 5_pickle_to_opensearch.py │ ├── 5_pickle_to_opensearch_aws.py │ ├── 5_pickle_to_pinecone_aws.py │ ├── 98_rename.py │ ├── 99_delete_pinecone_by_namespace.py │ ├── chunk_by_title_docx_weaviate.py │ ├── chunk_by_title_from_pickle_to_weaviate.py │ ├── chunk_by_title_papers_weaviate.py │ ├── chunk_by_title_pdf_pickle.py │ ├── chunk_by_title_pdf_pickle_pinecone.py │ ├── chunk_by_title_pdf_pickle_pinecone_with_switch.py │ ├── chunk_by_title_pdf_pickle_with_page_number.py │ ├── chunk_by_title_pdf_pickle_with_tools.py │ ├── chunk_by_title_pdf_weaviate.py │ ├── chunk_by_title_pptx_pickle_pinecone.py │ ├── chunk_by_title_pptx_pickle_with_detail_message.py │ └── chunk_by_title_pptx_weaviate.py └── tools │ ├── chunk_by_sci_pdf.py │ ├── unstructure_docx.py │ ├── unstructure_pdf.py │ ├── unstructure_pptx.py │ ├── vision.py │ └── weaviate_basic_functions.py ├── esg ├── 1_file2pickle.py ├── 2_update_unstructure_time.py ├── 3_pickle_to_opensearch_aws.py ├── 3_pickle_to_pinecone.py ├── 3_pickle_to_pinecone_aws.py ├── 98_rename.py ├── legacy │ ├── 0_create_opensearch_index.py │ ├── 1_chunk_by_title.py │ ├── 1_chunk_by_title_0.py │ ├── 1_chunk_by_title_1.py │ ├── 1_chunk_by_title_2.py │ ├── 1_chunk_by_title_3.py │ ├── 2_embedding_init.py │ ├── 3_chunk_by_title_pages.py │ ├── 3_pickle_to_opensearch.py │ ├── 99_delete_pinecone_by_namespace.py │ ├── download_pdf.py │ ├── record_chunk.py │ └── upload_pdf.py └── tools │ ├── unstructure_pdf.py │ ├── unstructure_pdf_pages.py │ └── vision.py ├── journals ├── 0_pre_screening.py ├── 0_pre_screening_fast.py ├── 1_pickle_to_pinecone.py ├── 2_pickle_to_opensearch_aws copy.py ├── 2_pickle_to_opensearch_aws.py ├── 2_pickle_to_pinecone_aws.py ├── file_to_pickle.py ├── file_to_pickle1.py ├── file_to_pickle2.py ├── file_to_pickle3.py ├── file_to_pickle4.py ├── legacy │ ├── chunk_by_title_sci.py │ ├── chunk_by_title_sci_0.py │ ├── chunk_by_title_sci_1.py │ ├── chunk_by_title_sci_2.py │ └── chunk_by_title_sci_3.py ├── read_pickle.py ├── record_chunk.py ├── rename.py ├── split.py ├── tools │ ├── chunk_by_sci_pdf.py │ └── vision.py └── update_time.py ├── legacy ├── chunk_by_title copy.py ├── chunk_by_title_and_content_docx_pinecone.py ├── chunk_by_title_docx_format.py ├── chunk_by_title_docx_jsonl.py ├── chunk_by_title_pdf.py ├── chunk_by_title_pdf_docx_SCI_Paper.py ├── chunk_by_title_pdf_embedding_compare.py ├── ecoinvent_extract_pdf.py ├── ecoinvent_split_pdf.py ├── ecoinvent_to_df.py ├── fine_tune.py ├── gpu_detect.py ├── pinecone_serverless.py ├── tools │ ├── embedding.py │ ├── func_calling.py │ ├── unlock.py │ ├── unstructure_docx.py │ ├── unstructure_pdf.py │ ├── unstructure_pptx.py │ ├── vision.py │ └── weaviate_basic_functions.py ├── xata_ask_ai.py └── xata_upload.py ├── patents ├── 1_pickle_2_opensearch_aws.py ├── 1_pickle_2_pinecone.py └── 99_delete_pinecone_by_namespace.py ├── pptx └── chunk_by_title_pptx_weaviate.py ├── reports ├── 1_chunk_by_title.py ├── 2_embedding_init.py ├── 3_pickle_to_opensearch.py ├── 3_pickle_to_opensearch_aws.py ├── 3_pickle_to_pinecone.py ├── 98_file_rename.py ├── 99_delete_pinecone_by_namespace.py ├── download_pdf.py ├── tools │ ├── unstructure_pdf.py │ └── vision.py └── upload_pdf.py ├── standards ├── 1_file2pickle.py ├── 2_pickle_to_opensearch.py ├── 3_pickle_to_pinecone.py ├── legacy │ ├── 1_chunk_by_title.py │ ├── 2_embedding_init.py │ ├── 3_pickle_to_opensearch.py │ ├── 98_file_rename.py │ ├── 99_delete_pinecone_by_namespace.py │ ├── remove_duplicates.py │ └── unstructure.py ├── tools │ ├── unstructure_pdf.py │ └── vision.py └── unstructure.py ├── tools ├── remove.py └── test_gpu.py └── weaviate ├── create_collection.py └── test_connection.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/.gitignore -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/README.md -------------------------------------------------------------------------------- /docker/weaviate/docker-compose-cpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/docker/weaviate/docker-compose-cpu.yml -------------------------------------------------------------------------------- /docker/weaviate/docker-compose-gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/docker/weaviate/docker-compose-gpu.yml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/requirements.txt -------------------------------------------------------------------------------- /src/ali/1_file_to_pickles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/1_file_to_pickles.py -------------------------------------------------------------------------------- /src/ali/2_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/2_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/ali/3_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/3_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/ali/legancy/1_docx_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/1_docx_pickle.py -------------------------------------------------------------------------------- /src/ali/legancy/1_docx_pickle_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/1_docx_pickle_opensearch.py -------------------------------------------------------------------------------- /src/ali/legancy/1_docx_pickle_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/1_docx_pickle_pinecone.py -------------------------------------------------------------------------------- /src/ali/legancy/2_pptx_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/2_pptx_pickle.py -------------------------------------------------------------------------------- /src/ali/legancy/2_pptx_pickle_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/2_pptx_pickle_opensearch.py -------------------------------------------------------------------------------- /src/ali/legancy/2_pptx_pickle_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/2_pptx_pickle_pinecone.py -------------------------------------------------------------------------------- /src/ali/legancy/3_pdf_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/3_pdf_pickle.py -------------------------------------------------------------------------------- /src/ali/legancy/3_pdf_pickle_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/3_pdf_pickle_opensearch.py -------------------------------------------------------------------------------- /src/ali/legancy/3_pdf_pickle_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/3_pdf_pickle_pinecone.py -------------------------------------------------------------------------------- /src/ali/legancy/5_chunk_by_title_pdf_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/5_chunk_by_title_pdf_pickle.py -------------------------------------------------------------------------------- /src/ali/legancy/6_chunk_by_title_pdf_pickle_with_page_number.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/6_chunk_by_title_pdf_pickle_with_page_number.py -------------------------------------------------------------------------------- /src/ali/legancy/7_chunk_by_title_pdf_pickle_pinecone_with_switch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/7_chunk_by_title_pdf_pickle_pinecone_with_switch.py -------------------------------------------------------------------------------- /src/ali/legancy/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_docx_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_docx_weaviate.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_from_pickle_to_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_from_pickle_to_weaviate.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_papers_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_papers_weaviate.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_pdf_pickle_with_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_pdf_pickle_with_tools.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_pdf_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_pdf_weaviate.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_pptx_pickle_with_detail_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_pptx_pickle_with_detail_message.py -------------------------------------------------------------------------------- /src/ali/legancy/chunk_by_title_pptx_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/legancy/chunk_by_title_pptx_weaviate.py -------------------------------------------------------------------------------- /src/ali/tools/chunk_by_sci_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/chunk_by_sci_pdf.py -------------------------------------------------------------------------------- /src/ali/tools/unstructure_docx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/unstructure_docx.py -------------------------------------------------------------------------------- /src/ali/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/ali/tools/unstructure_pptx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/unstructure_pptx.py -------------------------------------------------------------------------------- /src/ali/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/vision.py -------------------------------------------------------------------------------- /src/ali/tools/weaviate_basic_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/ali/tools/weaviate_basic_functions.py -------------------------------------------------------------------------------- /src/edu_textbooks/QA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/QA.py -------------------------------------------------------------------------------- /src/edu_textbooks/chunk_by_title_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/chunk_by_title_0.py -------------------------------------------------------------------------------- /src/edu_textbooks/chunk_by_title_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/chunk_by_title_1.py -------------------------------------------------------------------------------- /src/edu_textbooks/pickle_to_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/pickle_to_opensearch_aws.py -------------------------------------------------------------------------------- /src/edu_textbooks/pickle_to_pinecone_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/pickle_to_pinecone_aws.py -------------------------------------------------------------------------------- /src/edu_textbooks/split_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/split_list.py -------------------------------------------------------------------------------- /src/edu_textbooks/tools/unstructure_docx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/tools/unstructure_docx.py -------------------------------------------------------------------------------- /src/edu_textbooks/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/edu_textbooks/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/edu_textbooks/tools/vision.py -------------------------------------------------------------------------------- /src/education/1_file2pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/1_file2pickle.py -------------------------------------------------------------------------------- /src/education/legacy/0_create_opensearch_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/0_create_opensearch_index.py -------------------------------------------------------------------------------- /src/education/legacy/0_pdf_pickle_service_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/0_pdf_pickle_service_0.py -------------------------------------------------------------------------------- /src/education/legacy/0_pdf_pickle_service_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/0_pdf_pickle_service_1.py -------------------------------------------------------------------------------- /src/education/legacy/0_pdf_pickle_service_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/0_pdf_pickle_service_2.py -------------------------------------------------------------------------------- /src/education/legacy/0_pickle_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/0_pickle_service.py -------------------------------------------------------------------------------- /src/education/legacy/1_chunk_by_title_docx_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/1_chunk_by_title_docx_pickle.py -------------------------------------------------------------------------------- /src/education/legacy/2_chunk_by_title_pdf_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/2_chunk_by_title_pdf_pickle.py -------------------------------------------------------------------------------- /src/education/legacy/3_chunk_by_title_pptx_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/3_chunk_by_title_pptx_pickle.py -------------------------------------------------------------------------------- /src/education/legacy/4_chunk_by_title_pickle_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/4_chunk_by_title_pickle_embedding.py -------------------------------------------------------------------------------- /src/education/legacy/4_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/4_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/education/legacy/5_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/5_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/education/legacy/5_pickle_to_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/5_pickle_to_opensearch_aws.py -------------------------------------------------------------------------------- /src/education/legacy/5_pickle_to_pinecone_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/5_pickle_to_pinecone_aws.py -------------------------------------------------------------------------------- /src/education/legacy/98_rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/98_rename.py -------------------------------------------------------------------------------- /src/education/legacy/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_docx_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_docx_weaviate.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_from_pickle_to_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_from_pickle_to_weaviate.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_papers_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_papers_weaviate.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_pickle.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_pickle_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_pickle_pinecone.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_pickle_pinecone_with_switch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_pickle_pinecone_with_switch.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_pickle_with_page_number.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_pickle_with_page_number.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_pickle_with_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_pickle_with_tools.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pdf_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pdf_weaviate.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pptx_pickle_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pptx_pickle_pinecone.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pptx_pickle_with_detail_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pptx_pickle_with_detail_message.py -------------------------------------------------------------------------------- /src/education/legacy/chunk_by_title_pptx_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/legacy/chunk_by_title_pptx_weaviate.py -------------------------------------------------------------------------------- /src/education/tools/chunk_by_sci_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/chunk_by_sci_pdf.py -------------------------------------------------------------------------------- /src/education/tools/unstructure_docx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/unstructure_docx.py -------------------------------------------------------------------------------- /src/education/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/education/tools/unstructure_pptx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/unstructure_pptx.py -------------------------------------------------------------------------------- /src/education/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/vision.py -------------------------------------------------------------------------------- /src/education/tools/weaviate_basic_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/education/tools/weaviate_basic_functions.py -------------------------------------------------------------------------------- /src/esg/1_file2pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/1_file2pickle.py -------------------------------------------------------------------------------- /src/esg/2_update_unstructure_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/2_update_unstructure_time.py -------------------------------------------------------------------------------- /src/esg/3_pickle_to_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/3_pickle_to_opensearch_aws.py -------------------------------------------------------------------------------- /src/esg/3_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/3_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/esg/3_pickle_to_pinecone_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/3_pickle_to_pinecone_aws.py -------------------------------------------------------------------------------- /src/esg/98_rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/98_rename.py -------------------------------------------------------------------------------- /src/esg/legacy/0_create_opensearch_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/0_create_opensearch_index.py -------------------------------------------------------------------------------- /src/esg/legacy/1_chunk_by_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/1_chunk_by_title.py -------------------------------------------------------------------------------- /src/esg/legacy/1_chunk_by_title_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/1_chunk_by_title_0.py -------------------------------------------------------------------------------- /src/esg/legacy/1_chunk_by_title_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/1_chunk_by_title_1.py -------------------------------------------------------------------------------- /src/esg/legacy/1_chunk_by_title_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/1_chunk_by_title_2.py -------------------------------------------------------------------------------- /src/esg/legacy/1_chunk_by_title_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/1_chunk_by_title_3.py -------------------------------------------------------------------------------- /src/esg/legacy/2_embedding_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/2_embedding_init.py -------------------------------------------------------------------------------- /src/esg/legacy/3_chunk_by_title_pages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/3_chunk_by_title_pages.py -------------------------------------------------------------------------------- /src/esg/legacy/3_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/3_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/esg/legacy/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/esg/legacy/download_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/download_pdf.py -------------------------------------------------------------------------------- /src/esg/legacy/record_chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/record_chunk.py -------------------------------------------------------------------------------- /src/esg/legacy/upload_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/legacy/upload_pdf.py -------------------------------------------------------------------------------- /src/esg/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/esg/tools/unstructure_pdf_pages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/tools/unstructure_pdf_pages.py -------------------------------------------------------------------------------- /src/esg/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/esg/tools/vision.py -------------------------------------------------------------------------------- /src/journals/0_pre_screening.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/0_pre_screening.py -------------------------------------------------------------------------------- /src/journals/0_pre_screening_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/0_pre_screening_fast.py -------------------------------------------------------------------------------- /src/journals/1_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/1_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/journals/2_pickle_to_opensearch_aws copy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/2_pickle_to_opensearch_aws copy.py -------------------------------------------------------------------------------- /src/journals/2_pickle_to_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/2_pickle_to_opensearch_aws.py -------------------------------------------------------------------------------- /src/journals/2_pickle_to_pinecone_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/2_pickle_to_pinecone_aws.py -------------------------------------------------------------------------------- /src/journals/file_to_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/file_to_pickle.py -------------------------------------------------------------------------------- /src/journals/file_to_pickle1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/file_to_pickle1.py -------------------------------------------------------------------------------- /src/journals/file_to_pickle2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/file_to_pickle2.py -------------------------------------------------------------------------------- /src/journals/file_to_pickle3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/file_to_pickle3.py -------------------------------------------------------------------------------- /src/journals/file_to_pickle4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/file_to_pickle4.py -------------------------------------------------------------------------------- /src/journals/legacy/chunk_by_title_sci.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/legacy/chunk_by_title_sci.py -------------------------------------------------------------------------------- /src/journals/legacy/chunk_by_title_sci_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/legacy/chunk_by_title_sci_0.py -------------------------------------------------------------------------------- /src/journals/legacy/chunk_by_title_sci_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/legacy/chunk_by_title_sci_1.py -------------------------------------------------------------------------------- /src/journals/legacy/chunk_by_title_sci_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/legacy/chunk_by_title_sci_2.py -------------------------------------------------------------------------------- /src/journals/legacy/chunk_by_title_sci_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/legacy/chunk_by_title_sci_3.py -------------------------------------------------------------------------------- /src/journals/read_pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/read_pickle.py -------------------------------------------------------------------------------- /src/journals/record_chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/record_chunk.py -------------------------------------------------------------------------------- /src/journals/rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/rename.py -------------------------------------------------------------------------------- /src/journals/split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/split.py -------------------------------------------------------------------------------- /src/journals/tools/chunk_by_sci_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/tools/chunk_by_sci_pdf.py -------------------------------------------------------------------------------- /src/journals/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/tools/vision.py -------------------------------------------------------------------------------- /src/journals/update_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/journals/update_time.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title copy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title copy.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_and_content_docx_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_and_content_docx_pinecone.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_docx_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_docx_format.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_docx_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_docx_jsonl.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_pdf.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_pdf_docx_SCI_Paper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_pdf_docx_SCI_Paper.py -------------------------------------------------------------------------------- /src/legacy/chunk_by_title_pdf_embedding_compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/chunk_by_title_pdf_embedding_compare.py -------------------------------------------------------------------------------- /src/legacy/ecoinvent_extract_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/ecoinvent_extract_pdf.py -------------------------------------------------------------------------------- /src/legacy/ecoinvent_split_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/ecoinvent_split_pdf.py -------------------------------------------------------------------------------- /src/legacy/ecoinvent_to_df.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/ecoinvent_to_df.py -------------------------------------------------------------------------------- /src/legacy/fine_tune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/fine_tune.py -------------------------------------------------------------------------------- /src/legacy/gpu_detect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/gpu_detect.py -------------------------------------------------------------------------------- /src/legacy/pinecone_serverless.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/pinecone_serverless.py -------------------------------------------------------------------------------- /src/legacy/tools/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/embedding.py -------------------------------------------------------------------------------- /src/legacy/tools/func_calling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/func_calling.py -------------------------------------------------------------------------------- /src/legacy/tools/unlock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/unlock.py -------------------------------------------------------------------------------- /src/legacy/tools/unstructure_docx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/unstructure_docx.py -------------------------------------------------------------------------------- /src/legacy/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/legacy/tools/unstructure_pptx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/unstructure_pptx.py -------------------------------------------------------------------------------- /src/legacy/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/vision.py -------------------------------------------------------------------------------- /src/legacy/tools/weaviate_basic_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/tools/weaviate_basic_functions.py -------------------------------------------------------------------------------- /src/legacy/xata_ask_ai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/xata_ask_ai.py -------------------------------------------------------------------------------- /src/legacy/xata_upload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/legacy/xata_upload.py -------------------------------------------------------------------------------- /src/patents/1_pickle_2_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/patents/1_pickle_2_opensearch_aws.py -------------------------------------------------------------------------------- /src/patents/1_pickle_2_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/patents/1_pickle_2_pinecone.py -------------------------------------------------------------------------------- /src/patents/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/patents/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/pptx/chunk_by_title_pptx_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/pptx/chunk_by_title_pptx_weaviate.py -------------------------------------------------------------------------------- /src/reports/1_chunk_by_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/1_chunk_by_title.py -------------------------------------------------------------------------------- /src/reports/2_embedding_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/2_embedding_init.py -------------------------------------------------------------------------------- /src/reports/3_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/3_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/reports/3_pickle_to_opensearch_aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/3_pickle_to_opensearch_aws.py -------------------------------------------------------------------------------- /src/reports/3_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/3_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/reports/98_file_rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/98_file_rename.py -------------------------------------------------------------------------------- /src/reports/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/reports/download_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/download_pdf.py -------------------------------------------------------------------------------- /src/reports/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/reports/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/tools/vision.py -------------------------------------------------------------------------------- /src/reports/upload_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/reports/upload_pdf.py -------------------------------------------------------------------------------- /src/standards/1_file2pickle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/1_file2pickle.py -------------------------------------------------------------------------------- /src/standards/2_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/2_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/standards/3_pickle_to_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/3_pickle_to_pinecone.py -------------------------------------------------------------------------------- /src/standards/legacy/1_chunk_by_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/1_chunk_by_title.py -------------------------------------------------------------------------------- /src/standards/legacy/2_embedding_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/2_embedding_init.py -------------------------------------------------------------------------------- /src/standards/legacy/3_pickle_to_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/3_pickle_to_opensearch.py -------------------------------------------------------------------------------- /src/standards/legacy/98_file_rename.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/98_file_rename.py -------------------------------------------------------------------------------- /src/standards/legacy/99_delete_pinecone_by_namespace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/99_delete_pinecone_by_namespace.py -------------------------------------------------------------------------------- /src/standards/legacy/remove_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/remove_duplicates.py -------------------------------------------------------------------------------- /src/standards/legacy/unstructure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/legacy/unstructure.py -------------------------------------------------------------------------------- /src/standards/tools/unstructure_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/tools/unstructure_pdf.py -------------------------------------------------------------------------------- /src/standards/tools/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/standards/tools/vision.py -------------------------------------------------------------------------------- /src/standards/unstructure.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/tools/remove.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/tools/remove.py -------------------------------------------------------------------------------- /src/tools/test_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/tools/test_gpu.py -------------------------------------------------------------------------------- /src/weaviate/create_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/weaviate/create_collection.py -------------------------------------------------------------------------------- /src/weaviate/test_connection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linancn/TianGong-AI-Unstructure/HEAD/src/weaviate/test_connection.py --------------------------------------------------------------------------------