├── .gitignore ├── Loong ├── LICENSE ├── README.md ├── requirements.txt └── src │ ├── run.sh │ ├── step1_load_data.py │ ├── step2_model_generate.py │ ├── step3_model_evaluate.py │ ├── step4_cal_metric.py │ ├── test.sh │ ├── tmp.json │ ├── utils │ ├── args.py │ ├── config.py │ ├── generate.py │ ├── metric.py │ ├── prompt.py │ ├── token_length.py │ └── util.py │ └── vllm_example.sh ├── README.md ├── do_merge_each_batch.py ├── main.py ├── prompts ├── README.md ├── construct_algorithm.txt ├── construct_catalogue.txt ├── construct_graph.txt ├── construct_table.txt ├── decompose.txt └── route.txt ├── requirements.txt ├── router.py ├── structurizer.py ├── train_router ├── accelerate_configs │ ├── deepspeed_zero1.yaml │ ├── deepspeed_zero2.yaml │ ├── deepspeed_zero3.yaml │ ├── fsdp_qlora.yaml │ ├── multi_gpu.yaml │ └── single_gpu.yaml ├── data │ ├── test.json │ └── train.json ├── dpo.py └── train.sh ├── utilizer.py └── utils └── qwenapi.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | eval_results 3 | intermediate_results 4 | Loong/assets/logo.png 5 | Loong/assets/main_fig.jpg 6 | Loong/config/models/claude3haiku.yaml 7 | Loong/config/models/claude35sonnet.yaml 8 | Loong/config/models/geminipro.yaml 9 | Loong/config/models/glm4.yaml 10 | Loong/config/models/gpt4.yaml 11 | Loong/config/models/gpt4o.yaml 12 | Loong/config/models/qwen2.yaml 13 | Loong/data/loong_process.jsonl 14 | Loong/data/loong.jsonl 15 | Loong/data/doc/README.md 16 | Loong/data/doc/financial/2019-avni123118form10k.txt 17 | Loong/data/doc/financial/2020-avni123119form10k.txt 18 | Loong/data/doc/financial/2020-f10k2019_boxscorebrands.txt 19 | Loong/data/doc/financial/2020-form10-k.txt 20 | Loong/data/doc/financial/2021-avni123120form10k.txt 21 | Loong/data/doc/financial/2021-f10k2020_boxscorebrands.txt 22 | Loong/data/doc/financial/2021-form10-k.txt 23 | Loong/data/doc/financial/2022-aqb-20211231x10k.txt 24 | Loong/data/doc/financial/2022-avni123121form10k.txt 25 | Loong/data/doc/financial/2022-f10k2021_boxscorebrands.txt 26 | Loong/data/doc/financial/2022-form10-k.txt 27 | Loong/data/doc/financial/2022-synl-20211231.txt 28 | Loong/data/doc/financial/2023-acnt-20221231.txt 29 | Loong/data/doc/financial/2023-Aetherium Acquisition Corp-n.txt 30 | Loong/data/doc/financial/2023-AMERIGUARD SECURITY SERVICES, INC.-n.txt 31 | Loong/data/doc/financial/2023-aqb-20221231x10k.txt 32 | Loong/data/doc/financial/2023-avni123123form10k.txt 33 | Loong/data/doc/financial/2023-f10k2022_boxscore.txt 34 | Loong/data/doc/financial/2023-form10-k.txt 35 | Loong/data/doc/financial/2024-1st FRANKLIN FINANCIAL CORP-j.txt 36 | Loong/data/doc/financial/2024-4Front Ventures Corp.-j.txt 37 | Loong/data/doc/financial/2024-1847 Holdings LLC-j.txt 38 | Loong/data/doc/financial/2024-acnt-20231231.txt 39 | Loong/data/doc/financial/2024-Acorda Therapeutics, Inc.-j.txt 40 | Loong/data/doc/financial/2024-Acutus Medical, Inc.-j.txt 41 | Loong/data/doc/financial/2024-ADIAL PHARMACEUTICALS, INC.-j.txt 42 | Loong/data/doc/financial/2024-Aditxt, Inc.-j.txt 43 | Loong/data/doc/financial/2024-ADM ENDEAVORS, INC.-j.txt 44 | Loong/data/doc/financial/2024-Aetherium Acquisition Corp-n.txt 45 | Loong/data/doc/financial/2024-Agape ATP Corp-j.txt 46 | Loong/data/doc/financial/2024-AgEagle Aerial Systems Inc.-j.txt 47 | Loong/data/doc/financial/2024-AGRIFORCE GROWING SYSTEMS LTD.-j.txt 48 | Loong/data/doc/financial/2024-Agrify Corp-j.txt 49 | Loong/data/doc/financial/2024-Aileron Therapeutics, Inc.-j.txt 50 | Loong/data/doc/financial/2024-AIM ImmunoTech Inc.-j.txt 51 | Loong/data/doc/financial/2024-Alaunos Therapeutics, Inc.-j.txt 52 | Loong/data/doc/financial/2024-Alpha Investment Inc.-j.txt 53 | Loong/data/doc/financial/2024-Alset Inc.-j.txt 54 | Loong/data/doc/financial/2024-Ameri Metro, Inc. (formerly Yellowwood)-j.txt 55 | Loong/data/doc/financial/2024-AMERICAN BATTERY MATERIALS, INC.-j.txt 56 | Loong/data/doc/financial/2024-American Resources Corp-j.txt 57 | Loong/data/doc/financial/2024-AMERICAN SHARED HOSPITAL SERVICES-j.txt 58 | Loong/data/doc/financial/2024-American Strategic Investment Co.-j.txt 59 | Loong/data/doc/financial/2024-AMERIGUARD SECURITY SERVICES, INC.-j.txt 60 | Loong/data/doc/financial/2024-AMERIGUARD SECURITY SERVICES, INC.-n.txt 61 | Loong/data/doc/financial/2024-Ameritek Ventures, Inc.-j.txt 62 | Loong/data/doc/financial/2024-Apple iSports Group, Inc.-j.txt 63 | Loong/data/doc/financial/2024-Appsoft Technologies, Inc.-j.txt 64 | Loong/data/doc/financial/2024-AppTech Payments Corp.-j.txt 65 | Loong/data/doc/financial/2024-APPYEA, INC-j.txt 66 | Loong/data/doc/financial/2024-aqb-20231231x10k.txt 67 | Loong/data/doc/financial/2024-AQUABOUNTY TECHNOLOGIES INC-j.txt 68 | Loong/data/doc/financial/2024-Arax Holdings Corp-j.txt 69 | Loong/data/doc/financial/2024-Arena Group Holdings, Inc.-j.txt 70 | Loong/data/doc/financial/2024-ARVANA INC-j.txt 71 | Loong/data/doc/financial/2024-ASCENT INDUSTRIES CO.-j.txt 72 | Loong/data/doc/financial/2024-Aspira Women's Health Inc.-j.txt 73 | Loong/data/doc/financial/2024-AST SpaceMobile, Inc.-j.txt 74 | Loong/data/doc/financial/2024-ATOSSA THERAPEUTICS, INC.-j.txt 75 | Loong/data/doc/financial/2024-AUDDIA INC.-j.txt 76 | Loong/data/doc/financial/2024-Ault Alliance, Inc.-j.txt 77 | Loong/data/doc/financial/2024-avni123123form10k.txt 78 | Loong/data/doc/financial/2024-AXIM BIOTECHNOLOGIES, INC.-j.txt 79 | Loong/data/doc/financial/2024-B. Riley Financial, Inc.-j.txt 80 | Loong/data/doc/financial/2024-Backblaze, Inc.-j.txt 81 | Loong/data/doc/financial/2024-Balance Labs, Inc.-j.txt 82 | Loong/data/doc/financial/2024-BASANITE, INC.-j.txt 83 | Loong/data/doc/financial/2024-BATTALION OIL CORP-j.txt 84 | Loong/data/doc/financial/2024-Beam Global-j.txt 85 | Loong/data/doc/financial/2024-Better Choice Co Inc.-j.txt 86 | Loong/data/doc/financial/2024-Better Home & Finance Holding Co-j.txt 87 | Loong/data/doc/financial/2024-Bio Essence Corp-j.txt 88 | Loong/data/doc/financial/2024-BIOADAPTIVES, INC.-j.txt 89 | Loong/data/doc/financial/2024-BioCorRx Inc.-j.txt 90 | Loong/data/doc/financial/2024-BIOETHICS LTD-j.txt 91 | Loong/data/doc/financial/2024-BIOFORCE NANOSCIENCES HOLDINGS, INC.-j.txt 92 | Loong/data/doc/financial/2024-BIOLARGO, INC.-j.txt 93 | Loong/data/doc/financial/2024-BiomX Inc.-j.txt 94 | Loong/data/doc/financial/2024-BioNexus Gene Lab Corp-j.txt 95 | Loong/data/doc/financial/2024-BIORA THERAPEUTICS, INC.-j.txt 96 | Loong/data/doc/financial/2024-BioSig Technologies, Inc.-j.txt 97 | Loong/data/doc/financial/2024-Biostax Corp.-j.txt 98 | Loong/data/doc/financial/2024-Bitech Technologies Corp-j.txt 99 | Loong/data/doc/financial/2024-BLACKBOXSTOCKS INC.-j.txt 100 | Loong/data/doc/financial/2024-BLUE DOLPHIN ENERGY CO-j.txt 101 | Loong/data/doc/financial/2024-BLUE DOLPHIN ENERGY CO-n.txt 102 | Loong/data/doc/financial/2024-Blue Line Protection Group, Inc.-j.txt 103 | Loong/data/doc/financial/2024-Blue Star Foods Corp.-j.txt 104 | Loong/data/doc/financial/2024-BM Technologies, Inc.-j.txt 105 | Loong/data/doc/financial/2024-Boxabl Inc.-j.txt 106 | Loong/data/doc/financial/2024-BRAINSTORM CELL THERAPEUTICS INC.-j.txt 107 | Loong/data/doc/financial/2024-BRAINSTORM CELL THERAPEUTICS INC.-n.txt 108 | Loong/data/doc/financial/2024-Brand Engagement Network Inc.-j.txt 109 | Loong/data/doc/financial/2024-Breeze Holdings Acquisition Corp.-j.txt 110 | Loong/data/doc/financial/2024-BrewBilt Brewing Co-j.txt 111 | Loong/data/doc/financial/2024-Bright Mountain Media, Inc.-j.txt 112 | Loong/data/doc/financial/2024-Broad Street Realty, Inc.-j.txt 113 | Loong/data/doc/financial/2024-BROADWAY FINANCIAL CORP /DE/-j.txt 114 | Loong/data/doc/financial/2024-BT Brands, Inc.-j.txt 115 | Loong/data/doc/financial/2024-BTC Digital Ltd.-j.txt 116 | Loong/data/doc/financial/2024-BurgerFi International, Inc.-j.txt 117 | Loong/data/doc/financial/2024-C & C TOURS, INC.-j.txt 118 | Loong/data/doc/financial/2024-C-Bond Systems, Inc-j.txt 119 | Loong/data/doc/financial/2024-CalEthos, Inc.-j.txt 120 | Loong/data/doc/financial/2024-CaliberCos Inc.-j.txt 121 | Loong/data/doc/financial/2024-Can B Corp-j.txt 122 | Loong/data/doc/financial/2024-CANCER CAPITAL CORP-j.txt 123 | Loong/data/doc/financial/2024-Cano Health, Inc.-j.txt 124 | Loong/data/doc/financial/2024-Canoo Inc.-j.txt 125 | Loong/data/doc/financial/2024-Carisma Therapeutics Inc.-j.txt 126 | Loong/data/doc/financial/2024-Catheter Precision, Inc.-j.txt 127 | Loong/data/doc/financial/2024-Cepton, Inc.-j.txt 128 | Loong/data/doc/financial/2024-CFN Enterprises Inc.-j.txt 129 | Loong/data/doc/financial/2024-Charlie's Holdings, Inc.-j.txt 130 | Loong/data/doc/financial/2024-CHASE PACKAGING CORP-j.txt 131 | Loong/data/doc/financial/2024-CHESAPEAKE GRANITE WASH TRUST-j.txt 132 | Loong/data/doc/financial/2024-Chicken Soup for the Soul Entertainment, Inc.-j.txt 133 | Loong/data/doc/financial/2024-China Foods Holdings Ltd.-j.txt 134 | Loong/data/doc/financial/2024-CHINA PHARMA HOLDINGS, INC.-j.txt 135 | Loong/data/doc/financial/2024-Chosen, Inc.-j.txt 136 | Loong/data/doc/financial/2024-Churchill Capital Corp VII-j.txt 137 | Loong/data/doc/financial/2024-Cidara Therapeutics, Inc.-j.txt 138 | Loong/data/doc/financial/2024-CIM Opportunity Zone Fund, L.P.-j.txt 139 | Loong/data/doc/financial/2024-CIRTRAN CORP-j.txt 140 | Loong/data/doc/financial/2024-CISO Global, Inc.-j.txt 141 | Loong/data/doc/financial/2024-Clean Energy Technologies, Inc.-j.txt 142 | Loong/data/doc/financial/2024-Clean Vision Corp-j.txt 143 | Loong/data/doc/financial/2024-CLEARONE INC-j.txt 144 | Loong/data/doc/financial/2024-ClearSign Technologies Corp-j.txt 145 | Loong/data/doc/financial/2024-CNS Pharmaceuticals, Inc.-j.txt 146 | Loong/data/doc/financial/2024-Complete Solaria, Inc.-j.txt 147 | Loong/data/doc/financial/2024-Corner Growth Acquisition Corp.-j.txt 148 | Loong/data/doc/financial/2024-Correlate Energy Corp.-j.txt 149 | Loong/data/doc/financial/2024-Cosmos Group Holdings Inc.-j.txt 150 | Loong/data/doc/financial/2024-Coyni, Inc.-j.txt 151 | Loong/data/doc/financial/2024-CPI AEROSTRUCTURES INC-j.txt 152 | Loong/data/doc/financial/2024-CQENS Technologies Inc.-j.txt 153 | Loong/data/doc/financial/2024-CROSS TIMBERS ROYALTY TRUST-j.txt 154 | Loong/data/doc/financial/2024-Crown Electrokinetics Corp.-j.txt 155 | Loong/data/doc/financial/2024-Crypto Co-j.txt 156 | Loong/data/doc/financial/2024-Cuentas Inc.-j.txt 157 | Loong/data/doc/financial/2024-CUTERA INC-j.txt 158 | Loong/data/doc/financial/2024-Data Call Technologies-j.txt 159 | Loong/data/doc/financial/2024-Data443 Risk Mitigation, Inc.-j.txt 160 | Loong/data/doc/financial/2024-DAWSON GEOPHYSICAL CO-j.txt 161 | Loong/data/doc/financial/2024-DecisionPoint Systems, Inc.-j.txt 162 | Loong/data/doc/financial/2024-DIGITAL ALLY, INC.-j.txt 163 | Loong/data/doc/financial/2024-Digital Brands Group, Inc.-j.txt 164 | Loong/data/doc/financial/2024-Digital Media Solutions, Inc.-j.txt 165 | Loong/data/doc/financial/2024-DISH DBS CORP-j.txt 166 | Loong/data/doc/financial/2024-DIVALL INSURED INCOME PROPERTIES 2 LIMITED PARTNERSHIP-j.txt 167 | Loong/data/doc/financial/2024-Dolphin Entertainment, Inc.-j.txt 168 | Loong/data/doc/financial/2024-Doma Holdings, Inc.-j.txt 169 | Loong/data/doc/financial/2024-Dominari Holdings Inc.-j.txt 170 | Loong/data/doc/financial/2024-Dror Ortho-Design, Inc.-j.txt 171 | Loong/data/doc/financial/2024-DUOS TECHNOLOGIES GROUP, INC.-j.txt 172 | Loong/data/doc/financial/2024-Dynamic Shares Trust-j.txt 173 | Loong/data/doc/financial/2024-DYNARESOURCE INC-j.txt 174 | Loong/data/doc/financial/2024-ea0202445-10k_american.txt 175 | Loong/data/doc/financial/2024-Edgemode, Inc.-j.txt 176 | Loong/data/doc/financial/2024-Edible Garden AG Inc-j.txt 177 | Loong/data/doc/financial/2024-Eiger BioPharmaceuticals, Inc.-n.txt 178 | Loong/data/doc/financial/2024-Elvictor Group, Inc.-j.txt 179 | Loong/data/doc/financial/2024-EOS INC.-j.txt 180 | Loong/data/doc/financial/2024-EQUUS TOTAL RETURN, INC.-j.txt 181 | Loong/data/doc/financial/2024-ExchangeRight Income Fund-j.txt 182 | Loong/data/doc/financial/2024-Exela Technologies, Inc.-j.txt 183 | Loong/data/doc/financial/2024-Ezagoo Ltd-j.txt 184 | Loong/data/doc/financial/2024-EzFill Holdings Inc-j.txt 185 | Loong/data/doc/financial/2024-Fathom Digital Manufacturing Corp-j.txt 186 | Loong/data/doc/financial/2024-FLEXIBLE SOLUTIONS INTERNATIONAL INC-j.txt 187 | Loong/data/doc/financial/2024-FlexShopper, Inc.-j.txt 188 | Loong/data/doc/financial/2024-Fluent, Inc.-j.txt 189 | Loong/data/doc/financial/2024-FOCUS UNIVERSAL INC.-j.txt 190 | Loong/data/doc/financial/2024-FORGE INNOVATION DEVELOPMENT CORP.-j.txt 191 | Loong/data/doc/financial/2024-form10-k.txt 192 | Loong/data/doc/financial/2024-Fortune Valley Treasures, Inc.-j.txt 193 | Loong/data/doc/financial/2024-FRACTYL HEALTH, INC.-j.txt 194 | Loong/data/doc/financial/2024-Free Flow, Inc.-j.txt 195 | Loong/data/doc/financial/2024-Future FinTech Group Inc.-j.txt 196 | Loong/data/doc/financial/2024-GameSquare Holdings, Inc.-j.txt 197 | Loong/data/doc/financial/2024-Gaucho Group Holdings, Inc.-j.txt 198 | Loong/data/doc/financial/2024-GBT Technologies Inc.-j.txt 199 | Loong/data/doc/financial/2024-GD Culture Group Ltd-j.txt 200 | Loong/data/doc/financial/2024-General Enterprise Ventures, Inc.-j.txt 201 | Loong/data/doc/financial/2024-GENERATION INCOME PROPERTIES, INC.-j.txt 202 | Loong/data/doc/financial/2024-Generations Bancorp NY, Inc.-j.txt 203 | Loong/data/doc/financial/2024-Genprex, Inc.-j.txt 204 | Loong/data/doc/financial/2024-GIVEMEPOWER CORP-j.txt 205 | Loong/data/doc/financial/2024-Global Clean Energy Holdings, Inc.-j.txt 206 | Loong/data/doc/financial/2024-Global Gas Corp-j.txt 207 | Loong/data/doc/financial/2024-Goal Acquisitions Corp.-j.txt 208 | Loong/data/doc/financial/2024-Gofba, Inc.-j.txt 209 | Loong/data/doc/financial/2024-GOLDENWELL BIOTECH, INC.-j.txt 210 | Loong/data/doc/financial/2024-Goodness Growth Holdings, Inc.-j.txt 211 | Loong/data/doc/financial/2024-GRANT PARK FUTURES FUND LIMITED PARTNERSHIP-j.txt 212 | Loong/data/doc/financial/2024-GRANT PARK FUTURES FUND LIMITED PARTNERSHIP-n.txt 213 | Loong/data/doc/financial/2024-Greater Cannabis Company, Inc.-j.txt 214 | Loong/data/doc/financial/2024-Greenbrook TMS Inc.-j.txt 215 | Loong/data/doc/financial/2024-Greenland Technologies Holding Corp.-j.txt 216 | Loong/data/doc/financial/2024-Greenwave Technology Solutions, Inc.-j.txt 217 | Loong/data/doc/financial/2024-Greenwich LifeSciences, Inc.-j.txt 218 | Loong/data/doc/financial/2024-GRESHAM WORLDWIDE, INC.-j.txt 219 | Loong/data/doc/financial/2024-GRIID Infrastructure Inc.-j.txt 220 | Loong/data/doc/financial/2024-Grom Social Enterprises, Inc.-j.txt 221 | Loong/data/doc/financial/2024-Gryphon Digital Mining, Inc.-j.txt 222 | Loong/data/doc/financial/2024-GSE SYSTEMS INC-j.txt 223 | Loong/data/doc/financial/2024-Hapi Metaverse Inc.-j.txt 224 | Loong/data/doc/financial/2024-HARTE HANKS INC-j.txt 225 | Loong/data/doc/financial/2024-HCW Biologics Inc.-j.txt 226 | Loong/data/doc/financial/2024-HealthLynked Corp-j.txt 227 | Loong/data/doc/financial/2024-HIGH WIRE NETWORKS, INC.-j.txt 228 | Loong/data/doc/financial/2024-HOUSTON AMERICAN ENERGY CORP-j.txt 229 | Loong/data/doc/financial/2024-HUGOTON ROYALTY TRUST-j.txt 230 | Loong/data/doc/financial/2024-iLearningEngines, Inc.-j.txt 231 | Loong/data/doc/financial/2024-ISUN, INC.-n.txt 232 | Loong/data/doc/financial/2024-Lovesac Co-n.txt 233 | Loong/data/doc/financial/2024-Nano Magic Inc.-n.txt 234 | Loong/data/doc/financial/2024-Quarta-Rad, Inc.-n.txt 235 | Loong/data/doc/financial/2024-Sequoia Mortgage Trust 2013-2-n.txt 236 | Loong/data/doc/financial/report_000001-2024-平安银行-2024年第一季度报告.txt 237 | Loong/data/doc/financial/report_000026-2024-飞亚达-2024年一季度报告.txt 238 | Loong/data/doc/financial/report_000049-2024-德赛电池-2024年一季度报告.txt 239 | Loong/data/doc/financial/report_000409-2024-云鼎科技-2024年一季度报告.txt 240 | Loong/data/doc/financial/report_000423-2015-东阿阿胶-2015年第三季度报告全文.txt 241 | Loong/data/doc/financial/report_000423-2015-东阿阿胶-2015年第一季度报告全文.txt 242 | Loong/data/doc/financial/report_000423-2016-东阿阿胶-2016年第三季度报告全文.txt 243 | Loong/data/doc/financial/report_000423-2016-东阿阿胶-2016年第一季度报告全文.txt 244 | Loong/data/doc/financial/report_000423-2017-东阿阿胶-2017年第三季度报告全文.txt 245 | Loong/data/doc/financial/report_000423-2017-东阿阿胶-2017年第一季度报告全文.txt 246 | Loong/data/doc/financial/report_000423-2018-东阿阿胶-2018年第三季度报告全文.txt 247 | Loong/data/doc/financial/report_000423-2019-东阿阿胶-2019年第一季度报告全文.txt 248 | Loong/data/doc/financial/report_000423-2020-东阿阿胶-2020年第一季度报告全文.txt 249 | Loong/data/doc/financial/report_000423-2021-东阿阿胶-2021年第一季度报告全文.txt 250 | Loong/data/doc/financial/report_000423-2022-东阿阿胶-2022年一季度报告.txt 251 | Loong/data/doc/financial/report_000423-2023-东阿阿胶-2023年一季度报告.txt 252 | Loong/data/doc/financial/report_000423-2024-东阿阿胶-2024年一季度报告.txt 253 | Loong/data/doc/financial/report_000488-2024-晨鸣纸业-2024年一季度报告.txt 254 | Loong/data/doc/financial/report_000498-2024-山东路桥-2024年一季度报告.txt 255 | Loong/data/doc/financial/report_000536-2024-华映科技-2024年一季度报告.txt 256 | Loong/data/doc/financial/report_000554-2024-泰山石油-2024年一季度报告.txt 257 | Loong/data/doc/financial/report_000567-2024-海德股份-2024年一季度报告.txt 258 | Loong/data/doc/financial/report_000607-2024-华媒控股-2024年一季度报告.txt 259 | Loong/data/doc/financial/report_000631-2024-顺发恒业-2024年一季度报告.txt 260 | Loong/data/doc/financial/report_000635-2024-英力特-2024年一季度报告.txt 261 | Loong/data/doc/financial/report_000636-2024-风华高科-2024年一季度报告.txt 262 | Loong/data/doc/financial/report_000651-2015-格力电器-2015年第三季度报告全文.txt 263 | Loong/data/doc/financial/report_000651-2016-格力电器-2016年第三季度报告全文.txt 264 | Loong/data/doc/financial/report_000651-2017-格力电器-2017年第三季度报告全文.txt 265 | Loong/data/doc/financial/report_000651-2017-格力电器-2017年第一季度报告全文.txt 266 | Loong/data/doc/financial/report_000651-2018-格力电器-2018年第三季度报告全文.txt 267 | Loong/data/doc/financial/report_000651-2019-格力电器-2019年第一季度报告全文.txt 268 | Loong/data/doc/financial/report_000651-2020-格力电器-2020年第一季度报告全文.txt 269 | Loong/data/doc/financial/report_000651-2021-格力电器-2021年第一季度报告全文.txt 270 | Loong/data/doc/financial/report_000651-2022-格力电器-2022年一季度报告.txt 271 | Loong/data/doc/financial/report_000651-2023-格力电器-2023年一季度报告.txt 272 | Loong/data/doc/financial/report_000651-2024-格力电器-2024年一季度报告.txt 273 | Loong/data/doc/financial/report_000702-2024-正虹科技-2024年一季度报告.txt 274 | Loong/data/doc/financial/report_000819-2024-岳阳兴长-2024年一季度报告.txt 275 | Loong/data/doc/financial/report_000848-2024-承德露露-2024年一季度报告.txt 276 | Loong/data/doc/financial/report_000863-2024-三湘印象-2024年一季度报告.txt 277 | Loong/data/doc/financial/report_000888-2024-峨眉山A-2024年一季度报告.txt 278 | Loong/data/doc/financial/report_000906-2024-浙商中拓-2024年一季度报告.txt 279 | Loong/data/doc/financial/report_000921-2024-海信家电-2024年一季度报告.txt 280 | Loong/data/doc/financial/report_000933-2024-神火股份-2024年一季度报告.txt 281 | Loong/data/doc/financial/report_000959-2024-首钢股份-2024年一季度报告.txt 282 | Loong/data/doc/financial/report_000972-2024-中基健康-2024年一季度报告.txt 283 | Loong/data/doc/financial/report_000989-2024-九芝堂-2024年一季度报告.txt 284 | Loong/data/doc/financial/report_001203-2024-大中矿业-2024年一季度报告.txt 285 | Loong/data/doc/financial/report_001207-2024-联科科技-2024年一季度报告.txt 286 | Loong/data/doc/financial/report_001223-2024-欧克科技-2024年一季度报告.txt 287 | Loong/data/doc/financial/report_001306-2024-夏厦精密-2024年一季度报告.txt 288 | Loong/data/doc/financial/report_001322-2024-箭牌家居-2024年一季度报告.txt 289 | Loong/data/doc/financial/report_001360-2024-南矿集团-2024年一季度报告.txt 290 | Loong/data/doc/financial/report_001368-2024-通达创智-2024年一季度报告.txt 291 | Loong/data/doc/financial/report_001378-2024-德冠新材-2024年一季度报告.txt 292 | Loong/data/doc/financial/report_001387-2024-雪祺电气-2024年一季度报告.txt 293 | Loong/data/doc/financial/report_002011-2024-盾安环境-2024年一季度报告.txt 294 | Loong/data/doc/financial/report_002019-2024-亿帆医药-2024年一季度报告.txt 295 | Loong/data/doc/financial/report_002020-2024-京新药业-2024年一季度报告.txt 296 | Loong/data/doc/financial/report_002023-2024-海特高新-2024年一季度报告.txt 297 | Loong/data/doc/financial/report_002026-2024-山东威达-2024年一季度报告.txt 298 | Loong/data/doc/financial/report_002033-2024-丽江股份-2024年一季度报告.txt 299 | Loong/data/doc/financial/report_002065-2024-东华软件-2024年一季度报告.txt 300 | Loong/data/doc/financial/report_002090-2024-金智科技-2024年一季度报告.txt 301 | Loong/data/doc/financial/report_002093-2024-国脉科技-2024年一季度报告.txt 302 | Loong/data/doc/financial/report_002138-2024-顺络电子-2024年一季度报告.txt 303 | Loong/data/doc/financial/report_002167-2024-东方锆业-2024年一季度报告.txt 304 | Loong/data/doc/financial/report_002186-2024-全聚德-2024年一季度报告.txt 305 | Loong/data/doc/financial/report_002194-2024-武汉凡谷-2024年一季度报告.txt 306 | Loong/data/doc/financial/report_002230-2024-科大讯飞-2024年一季度报告.txt 307 | Loong/data/doc/financial/report_002242-2024-九阳股份-2024年一季度报告.txt 308 | Loong/data/doc/financial/report_002245-2024-蔚蓝锂芯-2024年一季度报告.txt 309 | Loong/data/doc/financial/report_002283-2024-天润工业-2024年一季度报告.txt 310 | Loong/data/doc/financial/report_002285-2024-世联行-2024年一季度报告.txt 311 | Loong/data/doc/financial/report_002304-2015-洋河股份-2015年第三季度报告全文.txt 312 | Loong/data/doc/financial/report_002304-2015-洋河股份-2015年第一季度报告全文.txt 313 | Loong/data/doc/financial/report_002304-2016-洋河股份-2016年第三季度报告全文.txt 314 | Loong/data/doc/financial/report_002304-2017-洋河股份-2017年第三季度报告全文.txt 315 | Loong/data/doc/financial/report_002304-2017-洋河股份-2017年第一季度报告全文.txt 316 | Loong/data/doc/financial/report_002304-2018-洋河股份-2018年第三季度报告全文.txt 317 | Loong/data/doc/financial/report_002304-2018-洋河股份-2018年第一季度报告全文.txt 318 | Loong/data/doc/financial/report_002304-2019-洋河股份-2019年第一季度报告全文.txt 319 | Loong/data/doc/financial/report_002304-2020-洋河股份-2020年第一季度报告全文.txt 320 | Loong/data/doc/financial/report_002304-2021-洋河股份-2021年第一季度报告全文.txt 321 | Loong/data/doc/financial/report_002304-2022-洋河股份-2022年一季度报告.txt 322 | Loong/data/doc/financial/report_002304-2023-洋河股份-2023年一季度报告.txt 323 | Loong/data/doc/financial/report_002304-2024-洋河股份-2024年一季度报告.txt 324 | Loong/data/doc/financial/report_002312-2024-川发龙蟒-2024年一季度报告.txt 325 | Loong/data/doc/financial/report_002317-2024-众生药业-2024年一季度报告.txt 326 | Loong/data/doc/financial/report_002384-2024-东山精密-2024年一季度报告.txt 327 | Loong/data/doc/financial/report_002391-2024-长青股份-2024年一季度报告.txt 328 | Loong/data/doc/financial/report_002415-2024-海康威视-2024年第一季度报告.txt 329 | Loong/data/doc/financial/report_002461-2024-珠江啤酒-2024年一季度报告.txt 330 | Loong/data/doc/financial/report_002463-2024-沪电股份-2024年一季度报告.txt 331 | Loong/data/doc/financial/report_002468-2024-申通快递-2024年一季度报告.txt 332 | Loong/data/doc/financial/report_002519-2024-银河电子-2024年一季度报告.txt 333 | Loong/data/doc/financial/report_002537-2024-海联金汇-2024年一季度报告.txt 334 | Loong/data/doc/financial/report_002588-2024-史丹利-2024年一季度报告.txt 335 | Loong/data/doc/financial/report_002591-2024-恒大高新-2024年一季度报告.txt 336 | Loong/data/doc/financial/report_002594-2015-比亚迪-2015年第三季度报告全文.txt 337 | Loong/data/doc/financial/report_002594-2016-比亚迪-2016年第三季度报告全文.txt 338 | Loong/data/doc/financial/report_002594-2017-比亚迪-2017年第三季度报告全文.txt 339 | Loong/data/doc/financial/report_002594-2018-比亚迪-2018年第三季度报告全文.txt 340 | Loong/data/doc/financial/report_002594-2019-比亚迪-2019年第一季度报告全文.txt 341 | Loong/data/doc/financial/report_002594-2020-比亚迪-2020年第一季度报告全文.txt 342 | Loong/data/doc/financial/report_002594-2021-比亚迪-2021年第一季度报告全文.txt 343 | Loong/data/doc/financial/report_002594-2022-比亚迪-2022年一季度报告.txt 344 | Loong/data/doc/financial/report_002594-2023-比亚迪-2023年一季度报告.txt 345 | Loong/data/doc/financial/report_002594-2024-比亚迪-2024年一季度报告.txt 346 | Loong/data/doc/financial/report_002606-2024-大连电瓷-2024年一季度报告.txt 347 | Loong/data/doc/financial/report_002664-2024-信质集团-2024年一季度报告.txt 348 | Loong/data/doc/financial/report_002676-2024-顺威股份-2024年一季度报告.txt 349 | Loong/data/doc/financial/report_002681-2024-奋达科技-2024年一季度报告.txt 350 | Loong/data/doc/financial/report_002683-2024-广东宏大-2024年一季度报告.txt 351 | Loong/data/doc/financial/report_002726-2024-龙大美食-2024年一季度报告.txt 352 | Loong/data/doc/financial/report_002762-2024-金发拉比-2024年一季度报告.txt 353 | Loong/data/doc/financial/report_002767-2024-先锋电子-2024年一季度报告.txt 354 | Loong/data/doc/financial/report_002773-2024-康弘药业-2024年一季度报告.txt 355 | Loong/data/doc/financial/report_002791-2024-坚朗五金-2024年一季度报告.txt 356 | Loong/data/doc/financial/report_002812-2024-恩捷股份-2024年一季度报告.txt 357 | Loong/data/doc/financial/report_002843-2024-泰嘉股份-2023年第一季度报告修订版.txt 358 | Loong/data/doc/financial/report_002859-2024-洁美科技-2024年一季度报告.txt 359 | Loong/data/doc/financial/report_002881-2024-美格智能-2024年一季度报告.txt 360 | Loong/data/doc/financial/report_002891-2024-中宠股份-2024年一季度报告.txt 361 | Loong/data/doc/financial/report_002928-2024-华夏航空-2024年一季度报告.txt 362 | Loong/data/doc/financial/report_002938-2024-鹏鼎控股-2024年一季度报告.txt 363 | Loong/data/doc/financial/report_002970-2024-锐明技术-2024年一季度报告.txt 364 | Loong/data/doc/financial/report_002996-2024-顺博合金-2024年一季度报告.txt 365 | Loong/data/doc/financial/report_002997-2024-瑞鹄模具-2024年一季度报告.txt 366 | Loong/data/doc/financial/report_003006-2024-百亚股份-2024年一季度报告.txt 367 | Loong/data/doc/financial/report_003816-2024-中国广核-2024年第一季度报告.txt 368 | Loong/data/doc/financial/report_300004-2024-南风股份-2024年一季度报告.txt 369 | Loong/data/doc/financial/report_300014-2024-亿纬锂能-2024年一季度报告.txt 370 | Loong/data/doc/financial/report_300016-2024-北陆药业-2024年一季度报告.txt 371 | Loong/data/doc/financial/report_300069-2024-金利华电-2024年一季度报告.txt 372 | Loong/data/doc/financial/report_300070-2024-碧水源-2024年一季度报告.txt 373 | Loong/data/doc/financial/report_300072-2024-海新能科-2024年一季度报告.txt 374 | Loong/data/doc/financial/report_300079-2024-数码视讯-2024年一季度报告.txt 375 | Loong/data/doc/financial/report_300091-2024-金通灵-2024年一季度报告.txt 376 | Loong/data/doc/financial/report_300092-2024-科新机电-2024年一季度报告.txt 377 | Loong/data/doc/financial/report_300115-2024-长盈精密-2024年一季度报告.txt 378 | Loong/data/doc/financial/report_300126-2024-锐奇股份-2024年一季度报告.txt 379 | Loong/data/doc/financial/report_300133-2024-华策影视-2024年一季度报告.txt 380 | Loong/data/doc/financial/report_300134-2024-大富科技-2024年一季度报告.txt 381 | Loong/data/doc/financial/report_300136-2024-信维通信-2024年一季度报告.txt 382 | Loong/data/doc/financial/report_300140-2024-节能环境-2024年一季度报告.txt 383 | Loong/data/doc/financial/report_300142-2024-沃森生物-2024年一季度报告.txt 384 | Loong/data/doc/financial/report_300146-2015-汤臣倍健-2015年第三季度报告全文.txt 385 | Loong/data/doc/financial/report_300146-2016-汤臣倍健-2016年第三季度报告全文.txt 386 | Loong/data/doc/financial/report_300146-2017-汤臣倍健-2017年第三季度报告全文.txt 387 | Loong/data/doc/financial/report_300146-2019-汤臣倍健-2019年第一季度报告全文.txt 388 | Loong/data/doc/financial/report_300146-2021-汤臣倍健-2021年第一季度报告全文.txt 389 | Loong/data/doc/financial/report_300146-2022-汤臣倍健-2022年一季度报告.txt 390 | Loong/data/doc/financial/report_300146-2023-汤臣倍健-2023年一季度报告.txt 391 | Loong/data/doc/financial/report_300146-2024-汤臣倍健-2024年一季度报告.txt 392 | Loong/data/doc/financial/report_300160-2024-秀强股份-2024年一季度报告.txt 393 | Loong/data/doc/financial/report_300162-2024-雷曼光电-2024年一季度报告.txt 394 | Loong/data/doc/financial/report_300174-2024-元力股份-2024年一季度报告.txt 395 | Loong/data/doc/financial/report_300179-2024-四方达-2024年一季度报告.txt 396 | Loong/data/doc/financial/report_300180-2024-华峰超纤-2024年一季度报告.txt 397 | Loong/data/doc/financial/report_300212-2024-易华录-2024年一季度报告.txt 398 | Loong/data/doc/financial/report_300221-2024-银禧科技-2024年一季度报告.txt 399 | Loong/data/doc/financial/report_300223-2024-北京君正-2024年一季度报告.txt 400 | Loong/data/doc/financial/report_300232-2024-洲明科技-2024年一季度报告.txt 401 | Loong/data/doc/financial/report_300239-2024-东宝生物-2024年一季度报告.txt 402 | Loong/data/doc/financial/report_300249-2024-依米康-2024年一季度报告.txt 403 | Loong/data/doc/financial/report_300263-2024-隆华科技-2024年一季度报告.txt 404 | Loong/data/doc/financial/report_300275-2024-梅安森-2024年一季度报告.txt 405 | Loong/data/doc/financial/report_300278-2024-华昌达-2024年一季度报告.txt 406 | Loong/data/doc/financial/report_300284-2024-苏交科-2024年一季度报告.txt 407 | Loong/data/doc/financial/report_300292-2024-吴通控股-2024年一季度报告.txt 408 | Loong/data/doc/financial/report_300299-2024-富春股份-2024年一季度报告.txt 409 | Loong/data/doc/financial/report_300300-2024-海峡创新-2024年一季度报告.txt 410 | Loong/data/doc/financial/report_300314-2024-戴维医疗-2024年一季度报告.txt 411 | Loong/data/doc/financial/report_300321-2024-同大股份-2024年一季度报告.txt 412 | Loong/data/doc/financial/report_300337-2024-银邦股份-2024年一季度报告.txt 413 | Loong/data/doc/financial/report_300349-2024-金卡智能-2024年一季度报告.txt 414 | Loong/data/doc/financial/report_300354-2024-东华测试-2024年一季度报告.txt 415 | Loong/data/doc/financial/report_300358-2024-楚天科技-2024年一季度报告.txt 416 | Loong/data/doc/financial/report_300375-2024-鹏翎股份-2024年一季度报告.txt 417 | Loong/data/doc/financial/report_300390-2024-天华新能-2024年一季度报告.txt 418 | Loong/data/doc/financial/report_300393-2024-中来股份-2024年一季度报告.txt 419 | Loong/data/doc/financial/report_300394-2024-天孚通信-2024年一季度报告.txt 420 | Loong/data/doc/financial/report_300396-2024-迪瑞医疗-2024年一季度报告.txt 421 | Loong/data/doc/financial/report_300404-2024-博济医药-2024年一季度报告.txt 422 | Loong/data/doc/financial/report_300413-2024-芒果超媒-2024年一季度报告.txt 423 | Loong/data/doc/financial/report_300414-2024-中光防雷-2024年一季度报告.txt 424 | Loong/data/doc/financial/report_300439-2024-美康生物-2024年一季度报告.txt 425 | Loong/data/doc/financial/report_300446-2024-航天智造-2024年一季度报告.txt 426 | Loong/data/doc/financial/report_300462-2024-华铭智能-2024年一季度报告.txt 427 | Loong/data/doc/financial/report_300463-2024-迈克生物-2024年一季度报告.txt 428 | Loong/data/doc/financial/report_300467-2024-迅游科技-2024年一季度报告.txt 429 | Loong/data/doc/financial/report_300475-2024-香农芯创-2024年一季度报告.txt 430 | Loong/data/doc/financial/report_300478-2024-杭州高新-2024年一季度报告.txt 431 | Loong/data/doc/financial/report_300487-2024-蓝晓科技-2024年一季度报告.txt 432 | Loong/data/doc/financial/report_300496-2024-中科创达-2024年一季度报告.txt 433 | Loong/data/doc/financial/report_300502-2024-新易盛-2024年一季度报告.txt 434 | Loong/data/doc/financial/report_300525-2024-博思软件-2024年一季度报告.txt 435 | Loong/data/doc/financial/report_300540-2024-蜀道装备-2024年一季度报告.txt 436 | Loong/data/doc/financial/report_300541-2024-先进数通-2024年一季度报告.txt 437 | Loong/data/doc/financial/report_300550-2024-和仁科技-2024年一季度报告.txt 438 | Loong/data/doc/financial/report_300580-2024-贝斯特-2024年一季度报告.txt 439 | Loong/data/doc/financial/report_300581-2024-晨曦航空-2024年一季度报告.txt 440 | Loong/data/doc/financial/report_300584-2024-海辰药业-2024年一季度报告.txt 441 | Loong/data/doc/financial/report_300589-2024-江龙船艇-2024年一季度报告.txt 442 | Loong/data/doc/financial/report_300590-2024-移为通信-2024年一季度报告.txt 443 | Loong/data/doc/financial/report_300592-2024-华凯易佰-2024年一季度报告.txt 444 | Loong/data/doc/financial/report_300607-2024-拓斯达-2024年一季度报告.txt 445 | Loong/data/doc/financial/report_300608-2024-思特奇-2024年一季度报告.txt 446 | Loong/data/doc/financial/report_300617-2024-安靠智电-2024年一季度报告.txt 447 | Loong/data/doc/financial/report_300621-2024-维业股份-2024年一季度报告.txt 448 | Loong/data/doc/financial/report_300644-2024-南京聚隆-2024年一季度报告.txt 449 | Loong/data/doc/financial/report_300672-2024-国科微-2024年一季度报告.txt 450 | Loong/data/doc/financial/report_300673-2024-佩蒂股份-2024年一季度报告.txt 451 | Loong/data/doc/financial/report_300678-2024-中科信息-2024年一季度报告.txt 452 | Loong/data/doc/financial/report_300679-2024-电连技术-2024年一季度报告.txt 453 | Loong/data/doc/financial/report_300683-2024-海特生物-2024年一季度报告.txt 454 | Loong/data/doc/financial/report_300710-2024-万隆光电-2024年一季度报告.txt 455 | Loong/data/doc/financial/report_300725-2024-药石科技-2024年一季度报告.txt 456 | Loong/data/doc/financial/report_300741-2024-华宝股份-2024年一季度报告.txt 457 | Loong/data/doc/financial/report_300743-2024-天地数码-2024年一季度报告.txt 458 | Loong/data/doc/financial/report_300749-2024-顶固集创-2024年一季度报告.txt 459 | Loong/data/doc/financial/report_300761-2024-立华股份-2024年一季度报告.txt 460 | Loong/data/doc/financial/report_300768-2024-迪普科技-2024年一季度报告.txt 461 | Loong/data/doc/financial/report_300783-2024-三只松鼠-2024年一季度报告.txt 462 | Loong/data/doc/financial/report_300800-2024-力合科技-2024年一季度报告.txt 463 | Loong/data/doc/financial/report_300801-2024-泰和科技-2024年一季度报告.txt 464 | Loong/data/doc/financial/report_300822-2024-贝仕达克-2024年一季度报告.txt 465 | Loong/data/doc/financial/report_300840-2024-酷特智能-2024年一季度报告.txt 466 | Loong/data/doc/financial/report_300848-2024-美瑞新材-2024年一季度报告.txt 467 | Loong/data/doc/financial/report_300855-2024-图南股份-2024年一季度报告.txt 468 | Loong/data/doc/financial/report_300858-2024-科拓生物-2024年一季度报告.txt 469 | Loong/data/doc/financial/report_300862-2024-蓝盾光电-2024年一季度报告.txt 470 | Loong/data/doc/financial/report_300867-2024-圣元环保-2024年一季度报告.txt 471 | Loong/data/doc/financial/report_300882-2024-万胜智能-2024年一季度报告.txt 472 | Loong/data/doc/financial/report_300887-2024-谱尼测试-2024年一季度报告.txt 473 | Loong/data/doc/financial/report_300896-2024-爱美客-2024年一季度报告.txt 474 | Loong/data/doc/financial/report_300898-2024-熊猫乳品-2024年一季度报告.txt 475 | Loong/data/doc/financial/report_300900-2024-广联航空-2024年一季度报告.txt 476 | Loong/data/doc/financial/report_300908-2024-仲景食品-2024年一季度报告.txt 477 | Loong/data/doc/financial/report_300916-2024-朗特智能-2024年一季度报告.txt 478 | Loong/data/doc/financial/report_300917-2024-特发服务-2024年一季度报告.txt 479 | Loong/data/doc/financial/report_300923-2024-研奥股份-2024年一季度报告.txt 480 | Loong/data/doc/financial/report_300939-2024-秋田微-2024年一季度报告.txt 481 | Loong/data/doc/financial/report_300978-2024-东箭科技-2024年一季度报告.txt 482 | Loong/data/doc/financial/report_300990-2024-同飞股份-2024年一季度报告.txt 483 | Loong/data/doc/financial/report_300991-2024-创益通-2024年一季度报告.txt 484 | Loong/data/doc/financial/report_301000-2024-肇民科技-2024年一季度报告.txt 485 | Loong/data/doc/financial/report_301003-2024-江苏博云-2024年一季度报告.txt 486 | Loong/data/doc/financial/report_301022-2024-海泰科-2024年一季度报告.txt 487 | Loong/data/doc/financial/report_301046-2024-能辉科技-2024年一季度报告.txt 488 | Loong/data/doc/financial/report_301050-2024-雷电微力-2024年一季度报告.txt 489 | Loong/data/doc/financial/report_301058-2024-中粮科工-2024年一季度报告.txt 490 | Loong/data/doc/financial/report_301070-2024-开勒股份-2024年一季度报告.txt 491 | Loong/data/doc/financial/report_301075-2024-多瑞医药-2024年一季度报告.txt 492 | Loong/data/doc/financial/report_301099-2024-雅创电子-2024年一季度报告.txt 493 | Loong/data/doc/financial/report_301117-2024-佳缘科技-2024年一季度报告.txt 494 | Loong/data/doc/financial/report_301120-2024-新特电气-2024年一季度报告.txt 495 | Loong/data/doc/financial/report_301121-2024-紫建电子-2024年一季度报告.txt 496 | Loong/data/doc/financial/report_301129-2024-瑞纳智能-2024年一季度报告.txt 497 | Loong/data/doc/financial/report_301150-2024-中一科技-2024年一季度报告.txt 498 | Loong/data/doc/financial/report_301156-2024-美农生物-2024年一季度报告.txt 499 | Loong/data/doc/financial/report_301157-2024-华塑科技-2024年一季度报告.txt 500 | Loong/data/doc/financial/report_301200-2024-大族数控-2024年一季度报告.txt 501 | Loong/data/doc/financial/report_301219-2024-腾远钴业-2024年一季度报告.txt 502 | Loong/data/doc/financial/report_301246-2024-宏源药业-2024年一季度报告.txt 503 | Loong/data/doc/financial/report_301282-2024-金禄电子-2024年一季度报告.txt 504 | Loong/data/doc/financial/report_301283-2024-聚胶股份-2024年一季度报告.txt 505 | Loong/data/doc/financial/report_301285-2024-鸿日达-2024年一季度报告.txt 506 | Loong/data/doc/financial/report_301288-2024-清研环境-2024年一季度报告.txt 507 | Loong/data/doc/financial/report_301311-2024-昆船智能-2024年一季度报告.txt 508 | Loong/data/doc/financial/report_301318-2024-维海德-2024年一季度报告.txt 509 | Loong/data/doc/financial/report_301325-2024-曼恩斯特-2024年一季度报告.txt 510 | Loong/data/doc/financial/report_301345-2024-涛涛车业-2024年第一季度报告修订版.txt 511 | Loong/data/doc/financial/report_301357-2024-北方长龙-2024年一季度报告.txt 512 | Loong/data/doc/financial/report_301368-2024-丰立智能-2024年一季度报告.txt 513 | Loong/data/doc/financial/report_301370-2024-国科恒泰-2024年一季度报告.txt 514 | Loong/data/doc/financial/report_301376-2024-致欧科技-2024年一季度报告.txt 515 | Loong/data/doc/financial/report_301383-2024-天键股份-2024年一季度报告.txt 516 | Loong/data/doc/financial/report_301388-2024-欣灵电气-2024年一季度报告.txt 517 | Loong/data/doc/financial/report_301391-2024-卡莱特-2024年一季度报告.txt 518 | Loong/data/doc/financial/report_301419-2024-阿莱德-2024年一季度报告.txt 519 | Loong/data/doc/financial/report_301469-2024-恒达新材-2024年一季度报告.txt 520 | Loong/data/doc/financial/report_301507-2024-民生健康-2024年一季度报告.txt 521 | Loong/data/doc/financial/report_301517-2024-陕西华达-2024年一季度报告.txt 522 | Loong/data/doc/financial/report_301550-2024-斯菱股份-2024年一季度报告.txt 523 | Loong/data/doc/financial/report_301555-2024-惠柏新材-2024年一季度报告.txt 524 | Loong/data/doc/financial/report_301589-2024-诺瓦星云-2024年一季度报告.txt 525 | Loong/data/doc/financial/report_600123-2024-兰花科创-兰花科创2024年第一季度报告.txt 526 | Loong/data/doc/financial/report_600232-2024-金鹰股份-金鹰股份2024年第一季度报告.txt 527 | Loong/data/doc/financial/report_600282-2024-南钢股份-南京钢铁股份有限公司2024年第一季度报告.txt 528 | Loong/data/doc/financial/report_600310-2024-广西能源-广西能源股份有限公司2024年第一季度报告.txt 529 | Loong/data/doc/financial/report_600396-2024-ST金山-2024年第一季度报告.txt 530 | Loong/data/doc/financial/report_600410-2024-华胜天成-2024年第一季度报告.txt 531 | Loong/data/doc/financial/report_600436-2024-片仔癀-漳州片仔癀药业股份有限公司2024年第一季度报告.txt 532 | Loong/data/doc/financial/report_600449-2024-宁夏建材-宁夏建材2024年第一季度报告.txt 533 | Loong/data/doc/financial/report_600673-2024-东阳光-东阳光2024年第一季度报告.txt 534 | Loong/data/doc/financial/report_600717-2024-天津港-天津港股份有限公司2024年第一季度报告.txt 535 | Loong/data/doc/financial/report_600740-2024-山西焦化-山西焦化股份有限公司2024年第一季度报告.txt 536 | Loong/data/doc/financial/report_600745-2024-闻泰科技-2024年第一季度报告.txt 537 | Loong/data/doc/financial/report_600746-2024-江苏索普-江苏索普2024年第一季度报告.txt 538 | Loong/data/doc/financial/report_600866-2024-星湖科技-2024年一季度报告.txt 539 | Loong/data/doc/financial/report_600885-2024-宏发股份-宏发股份2024年第一季度报告.txt 540 | Loong/data/doc/financial/report_600955-2024-维远股份-利华益维远化学股份有限公司2024年第一季度报告.txt 541 | Loong/data/doc/financial/report_600980-2024-北矿科技-北矿科技2024年第一季度报告.txt 542 | Loong/data/doc/financial/report_601100-2024-恒立液压-江苏恒立液压股份有限公司2024年第一季度报告.txt 543 | Loong/data/doc/financial/report_601101-2024-昊华能源-北京昊华能源股份有限公司2024年第一季度报告.txt 544 | Loong/data/doc/financial/report_601318-2024-中国平安-平安银行股份有限公司2024年第一季度报告.txt 545 | Loong/data/doc/financial/report_601633-2024-长城汽车-长城汽车股份有限公司2024年第一季度报告.txt 546 | Loong/data/doc/financial/report_601975-2024-招商南油-招商南油2024年第一季度报告.txt 547 | Loong/data/doc/financial/report_603015-2024-弘讯科技-2024年一季度报告.txt 548 | Loong/data/doc/financial/report_603059-2024-倍加洁-倍加洁集团股份有限公司2024年第一季度报告.txt 549 | Loong/data/doc/financial/report_603062-2024-麦加芯彩-公司2024年第一季度报告.txt 550 | Loong/data/doc/financial/report_603081-2024-大丰实业-浙江大丰实业股份有限公司2024年第一季度报告.txt 551 | Loong/data/doc/financial/report_603093-2024-南华期货-南华期货股份有限公司2024年第一季度报告.txt 552 | Loong/data/doc/financial/report_603097-2024-江苏华辰-江苏华辰2024年第一季度报告.txt 553 | Loong/data/doc/financial/report_603132-2024-金徽股份-金徽股份2024年第一季度报告.txt 554 | Loong/data/doc/financial/report_603156-2024-养元饮品-养元饮品2024年第一季度报告.txt 555 | Loong/data/doc/financial/report_603206-2024-嘉环科技-2024年第一季度报告.txt 556 | Loong/data/doc/financial/report_603298-2024-杭叉集团-杭叉集团2024年第一季度报告.txt 557 | Loong/data/doc/financial/report_603309-2024-维力医疗-维力医疗2024年第一季度报告.txt 558 | Loong/data/doc/financial/report_603505-2024-金石资源-金石资源集团股份有限公司2024年第一季度报告.txt 559 | Loong/data/doc/financial/report_603558-2024-健盛集团-健盛集团2024年第一季度报告.txt 560 | Loong/data/doc/financial/report_603585-2024-苏利股份-苏利股份2024年第一季度报告.txt 561 | Loong/data/doc/financial/report_603629-2024-利通电子-利通电子2024年第一季度报告.txt 562 | Loong/data/doc/financial/report_603679-2024-华体科技-四川华体照明科技股份有限公司2024年第一季度报告.txt 563 | Loong/data/doc/financial/report_603713-2024-密尔克卫-密尔克卫智能供应链服务集团股份有限公司2024年第一季度报告.txt 564 | Loong/data/doc/financial/report_603737-2024-三棵树-2024年第一季度报告.txt 565 | Loong/data/doc/financial/report_603799-2024-华友钴业-华友钴业2024年第一季度报告.txt 566 | Loong/data/doc/financial/report_603919-2024-金徽酒-金徽酒股份有限公司2024年第一季度报告.txt 567 | Loong/data/doc/financial/report_603920-2024-世运电路-世运电路2024年第一季度报告.txt 568 | Loong/data/doc/financial/report_603978-2024-深圳新星-2024年第一季度报告.txt 569 | Loong/data/doc/financial/report_603982-2024-泉峰汽车-2024年第一季度报告.txt 570 | Loong/data/doc/financial/report_605300-2024-佳禾食品-佳禾食品工业股份有限公司2024年一季报.txt 571 | Loong/data/doc/financial/report_605389-2024-长龄液压-江苏长龄液压股份有限公司2024年一季度报告.txt 572 | Loong/data/doc/financial/report_688084-2024-晶品特装-2024年第一季度报告.txt 573 | Loong/data/doc/financial/report_688096-2024-京源环保-2024年第一季度报告.txt 574 | Loong/data/doc/financial/report_688111-2024-金山办公-金山办公2024年第一季度报告.txt 575 | Loong/data/doc/financial/report_688143-2024-长盈通-武汉长盈通光电技术股份有限公司2024年第一季度报告.txt 576 | Loong/data/doc/financial/report_688267-2024-中触媒-中触媒新材料股份有限公司2024年第一季度报告.txt 577 | Loong/data/doc/financial/report_688278-2024-特宝生物-特宝生物2024年第一季度报告.txt 578 | Loong/data/doc/financial/report_688316-2024-青云科技-2024年第一季度报告.txt 579 | Loong/data/doc/financial/report_688408-2024-中信博-中信博2024年第一季度报告.txt 580 | Loong/data/doc/financial/report_688510-2024-航亚科技-无锡航亚科技股份有限公司2024年第一季度报告.txt 581 | Loong/data/doc/financial/report_688516-2024-奥特维-无锡奥特维科技股份有限公司2024年第一季度报告.txt 582 | Loong/data/doc/financial/report_688579-2024-山大地纬-山大地纬2023年第一季度报告修订版.txt 583 | Loong/data/doc/financial/report_688626-2024-翔宇医疗-翔宇医疗2024年第一季度报告.txt 584 | Loong/data/doc/financial/report_688628-2024-优利德-2024年第一季度报告.txt 585 | Loong/data/doc/financial/report_688630-2024-芯碁微装-2024年第一季度报告.txt 586 | Loong/data/doc/financial/report_688676-2024-金盘科技-2024年第一季度报告.txt 587 | Loong/data/doc/financial/report_688682-2024-霍莱沃-2024年第一季度报告.txt 588 | Loong/data/doc/financial/report_688722-2024-同益中-同益中2024年第一季度报告.txt 589 | Loong/data/doc/financial/report_688793-2024-倍轻松-2024年第一季度报告.txt 590 | Loong/data/doc/legal/legal.json 591 | Loong/data/doc/paper/1508.01745.md 592 | Loong/data/doc/paper/1604.05280.md 593 | Loong/data/doc/paper/1604.05377.md 594 | Loong/data/doc/paper/1605.00252.md 595 | Loong/data/doc/paper/1610.04325.md 596 | Loong/data/doc/paper/1612.04662.md 597 | Loong/data/doc/paper/1709.03082.md 598 | Loong/data/doc/paper/1802.03426.md 599 | Loong/data/doc/paper/1802.08129.md 600 | Loong/data/doc/paper/1803.08375.md 601 | Loong/data/doc/paper/1804.04272.md 602 | Loong/data/doc/paper/1901.02039.md 603 | Loong/data/doc/paper/1901.05353.md 604 | Loong/data/doc/paper/1902.05715.md 605 | Loong/data/doc/paper/1902.07958.md 606 | Loong/data/doc/paper/1906.00341.md 607 | Loong/data/doc/paper/1907.02189.md 608 | Loong/data/doc/paper/1908.03825.md 609 | Loong/data/doc/paper/1909.04715.md 610 | Loong/data/doc/paper/1909.11114.md 611 | Loong/data/doc/paper/1909.11942.md 612 | Loong/data/doc/paper/1910.01161.md 613 | Loong/data/doc/paper/1910.10434.md 614 | Loong/data/doc/paper/1910.10683.md 615 | Loong/data/doc/paper/1911.00837.md 616 | Loong/data/doc/paper/1911.07205.md 617 | Loong/data/doc/paper/2001.04296.md 618 | Loong/data/doc/paper/2001.09186.md 619 | Loong/data/doc/paper/2002.03754.md 620 | Loong/data/doc/paper/2003.10555.md 621 | Loong/data/doc/paper/2004.08249.md 622 | Loong/data/doc/paper/2004.15015.md 623 | Loong/data/doc/paper/2005.10084.md 624 | Loong/data/doc/paper/2005.12592.md 625 | Loong/data/doc/paper/2007.01560.md 626 | Loong/data/doc/paper/2007.06048.md 627 | Loong/data/doc/paper/2008.07772.md 628 | Loong/data/doc/paper/2009.02235.md 629 | Loong/data/doc/paper/2010.00453.md 630 | Loong/data/doc/paper/2011.09533.md 631 | Loong/data/doc/paper/2012.04857.md 632 | Loong/data/doc/paper/2101.03049.md 633 | Loong/data/doc/paper/2103.01955.md 634 | Loong/data/doc/paper/2103.05149.md 635 | Loong/data/doc/paper/2103.11955.md 636 | Loong/data/doc/paper/2104.13209.md 637 | Loong/data/doc/paper/2105.01051.md 638 | Loong/data/doc/paper/2105.14550.md 639 | Loong/data/doc/paper/2106.04624.md 640 | Loong/data/doc/paper/2106.07447.md 641 | Loong/data/doc/paper/2107.01384.md 642 | Loong/data/doc/paper/2107.04700.md 643 | Loong/data/doc/paper/2108.08612.md 644 | Loong/data/doc/paper/2109.00666.md 645 | Loong/data/doc/paper/2109.09193.md 646 | Loong/data/doc/paper/2109.11251.md 647 | Loong/data/doc/paper/2111.14641.md 648 | Loong/data/doc/paper/2112.04359.md 649 | Loong/data/doc/paper/2112.09761.md 650 | Loong/data/doc/paper/2112.10913.md 651 | Loong/data/doc/paper/2112.13227.md 652 | Loong/data/doc/paper/2201.00965.md 653 | Loong/data/doc/paper/2201.11990.md 654 | Loong/data/doc/paper/2203.09043.md 655 | Loong/data/doc/paper/2203.15556.md 656 | Loong/data/doc/paper/2204.03775.md 657 | Loong/data/doc/paper/2205.10383.md 658 | Loong/data/doc/paper/2205.11257.md 659 | Loong/data/doc/paper/2206.06290.md 660 | Loong/data/doc/paper/2207.06731.md 661 | Loong/data/doc/paper/2207.11536.md 662 | Loong/data/doc/paper/2208.12615.md 663 | Loong/data/doc/paper/2209.02128.md 664 | Loong/data/doc/paper/2209.08244.md 665 | Loong/data/doc/paper/2209.08648.md 666 | Loong/data/doc/paper/2209.12660.md 667 | Loong/data/doc/paper/2209.12681.md 668 | Loong/data/doc/paper/2209.13768.md 669 | Loong/data/doc/paper/2210.09953.md 670 | Loong/data/doc/paper/2210.10749.md 671 | Loong/data/doc/paper/2210.11735.md 672 | Loong/data/doc/paper/2210.14140.md 673 | Loong/data/doc/paper/2210.15097.md 674 | Loong/data/doc/paper/2210.16724.md 675 | Loong/data/doc/paper/2211.04894.md 676 | Loong/data/doc/paper/2211.05244.md 677 | Loong/data/doc/paper/2211.05617.md 678 | Loong/data/doc/paper/2211.10066.md 679 | Loong/data/doc/paper/2211.10797.md 680 | Loong/data/doc/paper/2212.04356.md 681 | Loong/data/doc/paper/2212.10001.md 682 | Loong/data/doc/paper/2212.13138.md 683 | Loong/data/doc/paper/2301.05062.md 684 | Loong/data/doc/paper/2301.08807.md 685 | Loong/data/doc/paper/2301.10813.md 686 | Loong/data/doc/paper/2301.13196.md 687 | Loong/data/doc/paper/2302.05442.md 688 | Loong/data/doc/paper/2302.09751.md 689 | Loong/data/doc/paper/2302.13971.md 690 | Loong/data/doc/paper/2302.14376.md 691 | Loong/data/doc/paper/2303.07678.md 692 | Loong/data/doc/paper/2303.08774.md 693 | Loong/data/doc/paper/2303.08891.md 694 | Loong/data/doc/paper/2303.12712.md 695 | Loong/data/doc/paper/2303.13375.md 696 | Loong/data/doc/paper/2303.13495.md 697 | Loong/data/doc/paper/2303.13496.md 698 | Loong/data/doc/paper/2303.13604.md 699 | Loong/data/doc/paper/2303.15056.md 700 | Loong/data/doc/paper/2303.16129.md 701 | Loong/data/doc/paper/2303.16203.md 702 | Loong/data/doc/paper/2304.03283.md 703 | Loong/data/doc/paper/2304.03442.md 704 | Loong/data/doc/paper/2304.04661.md 705 | Loong/data/doc/paper/2304.06793.md 706 | Loong/data/doc/paper/2304.08177.md 707 | Loong/data/doc/paper/2304.08782.md 708 | Loong/data/doc/paper/2304.12898.md 709 | Loong/data/doc/paper/2304.14108.md 710 | Loong/data/doc/paper/2305.00857.md 711 | Loong/data/doc/paper/2305.01865.md 712 | Loong/data/doc/paper/2305.02536.md 713 | Loong/data/doc/paper/2305.03653.md 714 | Loong/data/doc/paper/2305.06161.md 715 | Loong/data/doc/paper/2305.07004.md 716 | Loong/data/doc/paper/2305.08322.md 717 | Loong/data/doc/paper/2305.12474.md 718 | Loong/data/doc/paper/2305.13691.md 719 | Loong/data/doc/paper/2305.14334.md 720 | Loong/data/doc/paper/2305.14992.md 721 | Loong/data/doc/paper/2305.15011.md 722 | Loong/data/doc/paper/2305.15077.md 723 | Loong/data/doc/paper/2305.15347.md 724 | Loong/data/doc/paper/2305.16291.md 725 | Loong/data/doc/paper/2305.16307.md 726 | Loong/data/doc/paper/2305.16366.md 727 | Loong/data/doc/paper/2305.17560.md 728 | Loong/data/doc/paper/2305.17812.md 729 | Loong/data/doc/paper/2305.18290.md 730 | Loong/data/doc/paper/2306.02707.md 731 | Loong/data/doc/paper/2306.03078.md 732 | Loong/data/doc/paper/2306.05685.md 733 | Loong/data/doc/paper/2306.06624.md 734 | Loong/data/doc/paper/2306.07629.md 735 | Loong/data/doc/paper/2306.09198.md 736 | Loong/data/doc/paper/2306.11348.md 737 | Loong/data/doc/paper/2306.11417.md 738 | Loong/data/doc/paper/2306.11644.md 739 | Loong/data/doc/paper/2306.15626.md 740 | Loong/data/doc/paper/2306.16793.md 741 | Loong/data/doc/paper/2306.17653.md 742 | Loong/data/doc/paper/2307.03172.md 743 | Loong/data/doc/paper/2307.03319.md 744 | Loong/data/doc/paper/2307.03875.md 745 | Loong/data/doc/paper/2307.04684.md 746 | Loong/data/doc/paper/2307.06135.md 747 | Loong/data/doc/paper/2307.09288.md 748 | Loong/data/doc/paper/2307.09481.md 749 | Loong/data/doc/paper/2307.10719.md 750 | Loong/data/doc/paper/2307.15217.md 751 | Loong/data/doc/paper/2307.15818.md 752 | Loong/data/doc/paper/2307.16039.md 753 | Loong/data/doc/paper/2307.16645.md 754 | Loong/data/doc/paper/2307.16789.md 755 | Loong/data/doc/paper/2307.16877.md 756 | Loong/data/doc/paper/2308.00352.md 757 | Loong/data/doc/paper/2308.01861.md 758 | Loong/data/doc/paper/2308.05384.md 759 | Loong/data/doc/paper/2308.07107.md 760 | Loong/data/doc/paper/2308.08493.md 761 | Loong/data/doc/paper/2308.08998.md 762 | Loong/data/doc/paper/2308.10960.md 763 | Loong/data/doc/paper/2308.11432.md 764 | Loong/data/doc/paper/2308.12284.md 765 | Loong/data/doc/paper/2308.12950.md 766 | Loong/data/doc/paper/2308.13937.md 767 | Loong/data/doc/paper/2309.00267.md 768 | Loong/data/doc/paper/2309.01219.md 769 | Loong/data/doc/paper/2309.01431.md 770 | Loong/data/doc/paper/2309.03450.md 771 | Loong/data/doc/paper/2309.04662.md 772 | Loong/data/doc/paper/2309.05463.md 773 | Loong/data/doc/paper/2309.05868.md 774 | Loong/data/doc/paper/2309.05922.md 775 | Loong/data/doc/paper/2309.06180.md 776 | Loong/data/doc/paper/2309.07864.md 777 | Loong/data/doc/paper/2309.08532.md 778 | Loong/data/doc/paper/2309.08958.md 779 | Loong/data/doc/paper/2309.09400.md 780 | Loong/data/doc/paper/2309.10313.md 781 | Loong/data/doc/paper/2309.10400.md 782 | Loong/data/doc/paper/2309.11925.md 783 | Loong/data/doc/paper/2309.12307.md 784 | Loong/data/doc/paper/2309.12499.md 785 | Loong/data/doc/paper/2309.12871.md 786 | Loong/data/doc/paper/2309.15025.md 787 | Loong/data/doc/paper/2309.15088.md 788 | Loong/data/doc/paper/2309.16039.md 789 | Loong/data/doc/paper/2309.17410.md 790 | Loong/data/doc/paper/2309.17421.md 791 | Loong/data/doc/paper/2309.17425.md 792 | Loong/data/doc/paper/2309.17452.md 793 | Loong/data/doc/paper/2309.17453.md 794 | Loong/data/doc/paper/2310.00564.md 795 | Loong/data/doc/paper/2310.00935.md 796 | Loong/data/doc/paper/2310.01036.md 797 | Loong/data/doc/paper/2310.01152.md 798 | Loong/data/doc/paper/2310.01387.md 799 | Loong/data/doc/paper/2310.01410.md 800 | Loong/data/doc/paper/2310.01798.md 801 | Loong/data/doc/paper/2310.01801.md 802 | Loong/data/doc/paper/2310.02238.md 803 | Loong/data/doc/paper/2310.02304.md 804 | Loong/data/doc/paper/2310.02989.md 805 | Loong/data/doc/paper/2310.03025.md 806 | Loong/data/doc/paper/2310.03051.md 807 | Loong/data/doc/paper/2310.03744.md 808 | Loong/data/doc/paper/2310.04406.md 809 | Loong/data/doc/paper/2310.04875.md 810 | Loong/data/doc/paper/2310.05204.md 811 | Loong/data/doc/paper/2310.05492.md 812 | Loong/data/doc/paper/2310.05915.md 813 | Loong/data/doc/paper/2310.06117.md 814 | Loong/data/doc/paper/2310.06225.md 815 | Loong/data/doc/paper/2310.06770.md 816 | Loong/data/doc/paper/2310.06825.md 817 | Loong/data/doc/paper/2310.06987.md 818 | Loong/data/doc/paper/2310.07075.md 819 | Loong/data/doc/paper/2310.07177.md 820 | Loong/data/doc/paper/2310.07554.md 821 | Loong/data/doc/paper/2310.07579.md 822 | Loong/data/doc/paper/2310.08118.md 823 | Loong/data/doc/paper/2310.08256.md 824 | Loong/data/doc/paper/2310.08319.md 825 | Loong/data/doc/paper/2310.08372.md 826 | Loong/data/doc/paper/2310.08419.md 827 | Loong/data/doc/paper/2310.08461.md 828 | Loong/data/doc/paper/2310.08491.md 829 | Loong/data/doc/paper/2310.08541.md 830 | Loong/data/doc/paper/2310.08879.md 831 | Loong/data/doc/paper/2310.09590.md 832 | Loong/data/doc/paper/2310.10047.md 833 | Loong/data/doc/paper/2310.10158.md 834 | Loong/data/doc/paper/2310.10482.md 835 | Loong/data/doc/paper/2310.10501.md 836 | Loong/data/doc/paper/2310.10631.md 837 | Loong/data/doc/paper/2310.10634.md 838 | Loong/data/doc/paper/2310.10638.md 839 | Loong/data/doc/paper/2310.10677.md 840 | Loong/data/doc/paper/2310.11511.md 841 | Loong/data/doc/paper/2310.11689.md 842 | Loong/data/doc/paper/2310.12036.md 843 | Loong/data/doc/paper/2310.12397.md 844 | Loong/data/doc/paper/2310.12426.md 845 | Loong/data/doc/paper/2310.12487.md 846 | Loong/data/doc/paper/2310.12541.md 847 | Loong/data/doc/paper/2310.12560.md 848 | Loong/data/doc/paper/2310.12773.md 849 | Loong/data/doc/paper/2310.12823.md 850 | Loong/data/doc/paper/2310.12931.md 851 | Loong/data/doc/paper/2310.12956.md 852 | Loong/data/doc/paper/2310.13023.md 853 | Loong/data/doc/paper/2310.13227.md 854 | Loong/data/doc/paper/2310.13548.md 855 | Loong/data/doc/paper/2310.13615.md 856 | Loong/data/doc/paper/2310.13639.md 857 | Loong/data/doc/paper/2310.13988.md 858 | Loong/data/doc/paper/2310.14628.md 859 | Loong/data/doc/paper/2310.15123.md 860 | Loong/data/doc/paper/2310.15144.md 861 | Loong/data/doc/paper/2310.15916.md 862 | Loong/data/doc/paper/2310.16450.md 863 | Loong/data/doc/paper/2310.16570.md 864 | Loong/data/doc/paper/2310.16789.md 865 | Loong/data/doc/paper/2310.16795.md 866 | Loong/data/doc/paper/2310.17623.md 867 | Loong/data/doc/paper/2310.17631.md 868 | Loong/data/doc/paper/2310.17976.md 869 | Loong/data/doc/paper/2310.18465.md 870 | Loong/data/doc/paper/2310.18969.md 871 | Loong/data/doc/paper/2310.19046.md 872 | Loong/data/doc/paper/2310.19102.md 873 | Loong/data/doc/paper/2310.19784.md 874 | Loong/data/doc/paper/2310.19852.md 875 | Loong/data/doc/paper/2310.20329.md 876 | Loong/data/doc/paper/2310.20689.md 877 | Loong/data/doc/paper/2310.20707.md 878 | Loong/data/doc/paper/2311.00423.md 879 | Loong/data/doc/paper/2311.01192.md 880 | Loong/data/doc/paper/2311.03348.md 881 | Loong/data/doc/paper/2311.04850.md 882 | Loong/data/doc/paper/2311.04897.md 883 | Loong/data/doc/paper/2311.05232.md 884 | Loong/data/doc/paper/2311.05997.md 885 | Loong/data/doc/paper/2311.07463.md 886 | Loong/data/doc/paper/2311.07911.md 887 | Loong/data/doc/paper/2311.08045.md 888 | Loong/data/doc/paper/2311.08252.md 889 | Loong/data/doc/paper/2311.08401.md 890 | Loong/data/doc/paper/2311.08711.md 891 | Loong/data/doc/paper/2311.08803.md 892 | Loong/data/doc/paper/2311.09215.md 893 | Loong/data/doc/paper/2311.09677.md 894 | Loong/data/doc/paper/2311.10081.md 895 | Loong/data/doc/paper/2311.10702.md 896 | Loong/data/doc/paper/2311.11045.md 897 | Loong/data/doc/paper/2311.11797.md 898 | Loong/data/doc/paper/2311.11855.md 899 | Loong/data/doc/paper/2311.12420.md 900 | Loong/data/doc/paper/2311.12983.md 901 | Loong/data/doc/paper/2311.13503.md 902 | Loong/data/doc/paper/2311.13721.md 903 | Loong/data/doc/paper/2311.14115.md 904 | Loong/data/doc/paper/2311.14520.md 905 | Loong/data/doc/paper/2311.15249.md 906 | Loong/data/doc/paper/2311.15451.md 907 | Loong/data/doc/paper/2311.16079.md 908 | Loong/data/doc/paper/2311.16090.md 909 | Loong/data/doc/paper/2311.16169.md 910 | Loong/data/doc/paper/2311.16452.md 911 | Loong/data/doc/paper/2311.16502.md 912 | Loong/data/doc/paper/2311.16867.md 913 | Loong/data/doc/paper/2311.17035.md 914 | Loong/data/doc/paper/2311.17541.md 915 | Loong/data/doc/paper/2311.17946.md 916 | Loong/data/doc/paper/2311.18677.md 917 | Loong/data/doc/paper/2311.18743.md 918 | Loong/data/doc/paper/2311.18760.md 919 | Loong/data/doc/paper/2312.00164.md 920 | Loong/data/doc/paper/2312.01797.md 921 | Loong/data/doc/paper/2312.02418.md 922 | Loong/data/doc/paper/2312.04724.md 923 | Loong/data/doc/paper/2312.04985.md 924 | Loong/data/doc/paper/2312.06585.md 925 | Loong/data/doc/paper/2312.07395.md 926 | Loong/data/doc/paper/2312.07551.md 927 | Loong/data/doc/paper/2312.07930.md 928 | Loong/data/doc/paper/2312.08358.md 929 | Loong/data/doc/paper/2312.08914.md 930 | Loong/data/doc/paper/2312.09085.md 931 | Loong/data/doc/paper/2312.09241.md 932 | Loong/data/doc/paper/2312.09244.md 933 | Loong/data/doc/paper/2312.09390.md 934 | Loong/data/doc/paper/2312.10997.md 935 | Loong/data/doc/paper/2312.11444.md 936 | Loong/data/doc/paper/2312.12575.md 937 | Loong/data/doc/paper/2312.12683.md 938 | Loong/data/doc/paper/2312.13771.md 939 | Loong/data/doc/paper/2312.14302.md 940 | Loong/data/doc/paper/2312.15166.md 941 | Loong/data/doc/paper/2312.15791.md 942 | Loong/data/doc/paper/2312.16682.md 943 | Loong/data/doc/paper/2312.17122.md 944 | Loong/data/doc/paper/2312.17173.md 945 | Loong/data/doc/paper/2312.17235.md 946 | Loong/data/doc/paper/2312.17238.md 947 | Loong/data/doc/paper/2401.00071.md 948 | Loong/data/doc/paper/2401.00211.md 949 | Loong/data/doc/paper/2401.00368.md 950 | Loong/data/doc/paper/2401.00595.md 951 | Loong/data/doc/paper/2401.00812.md 952 | Loong/data/doc/paper/2401.01055.md 953 | Loong/data/doc/paper/2401.01141.md 954 | Loong/data/doc/paper/2401.01275.md 955 | Loong/data/doc/paper/2401.01286.md 956 | Loong/data/doc/paper/2401.01325.md 957 | Loong/data/doc/paper/2401.01335.md 958 | Loong/data/doc/paper/2401.01614.md 959 | Loong/data/doc/paper/2401.01854.md 960 | Loong/data/doc/paper/2401.01879.md 961 | Loong/data/doc/paper/2401.02015.md 962 | Loong/data/doc/paper/2401.02051.md 963 | Loong/data/doc/paper/2401.02385.md 964 | Loong/data/doc/paper/2401.02954.md 965 | Loong/data/doc/paper/2401.03462.md 966 | Loong/data/doc/paper/2401.04056.md 967 | Loong/data/doc/paper/2401.04088.md 968 | Loong/data/doc/paper/2401.04398.md 969 | Loong/data/doc/paper/2401.04620.md 970 | Loong/data/doc/paper/2401.04621.md 971 | Loong/data/doc/paper/2401.04695.md 972 | Loong/data/doc/paper/2401.05268.md 973 | Loong/data/doc/paper/2401.05302.md 974 | Loong/data/doc/paper/2401.05561.md 975 | Loong/data/doc/paper/2401.05654.md 976 | Loong/data/doc/paper/2401.05778.md 977 | Loong/data/doc/paper/2401.05856.md 978 | Loong/data/doc/paper/2401.06059.md 979 | Loong/data/doc/paper/2401.06066.md 980 | Loong/data/doc/paper/2401.06080.md 981 | Loong/data/doc/paper/2401.06102.md 982 | Loong/data/doc/paper/2401.06118.md 983 | Loong/data/doc/paper/2401.06121.md 984 | Loong/data/doc/paper/2401.06201.md 985 | Loong/data/doc/paper/2401.06209.md 986 | Loong/data/doc/paper/2401.06373.md 987 | Loong/data/doc/paper/2401.06468.md 988 | Loong/data/doc/paper/2401.06760.md 989 | Loong/data/doc/paper/2401.06954.md 990 | Loong/data/doc/paper/2401.07102.md 991 | Loong/data/doc/paper/2401.07184.md 992 | Loong/data/doc/paper/2401.07324.md 993 | Loong/data/doc/paper/2401.07897.md 994 | Loong/data/doc/paper/2401.08281.md 995 | Loong/data/doc/paper/2401.08358.md 996 | Loong/data/doc/paper/2401.08406.md 997 | Loong/data/doc/paper/2401.08417.md 998 | Loong/data/doc/paper/2401.08541.md 999 | Loong/data/doc/paper/2401.09395.md 1000 | Loong/data/doc/paper/2401.09670.md 1001 | Loong/data/doc/paper/2401.10020.md 1002 | Loong/data/doc/paper/2401.10034.md 1003 | Loong/data/doc/paper/2401.10302.md 1004 | Loong/data/doc/paper/2401.10774.md 1005 | Loong/data/doc/paper/2401.10935.md 1006 | Loong/data/doc/paper/2401.11181.md 1007 | Loong/data/doc/paper/2401.11708.md 1008 | Loong/data/doc/paper/2401.11817.md 1009 | Loong/data/doc/paper/2401.11888.md 1010 | Loong/data/doc/paper/2401.12187.md 1011 | Loong/data/doc/paper/2401.12963.md 1012 | Loong/data/doc/paper/2401.13601.md 1013 | Loong/data/doc/paper/2401.13649.md 1014 | Loong/data/doc/paper/2401.13919.md 1015 | Loong/data/doc/paper/2401.14887.md 1016 | Loong/data/doc/paper/2401.15360.md 1017 | Loong/data/doc/paper/2401.16185.md 1018 | Loong/data/doc/paper/2401.17882.md 1019 | Loong/data/doc/paper/2401.18018.md 1020 | Loong/data/doc/paper/2401.18079.md 1021 | Loong/data/doc/paper/2402.00157.md 1022 | Loong/data/doc/paper/2402.00159.md 1023 | Loong/data/doc/paper/2402.01030.md 1024 | Loong/data/doc/paper/2402.01145.md 1025 | Loong/data/doc/paper/2402.01306.md 1026 | Loong/data/doc/paper/2402.01622.md 1027 | Loong/data/doc/paper/2402.01739.md 1028 | Loong/data/doc/paper/2402.01817.md 1029 | Loong/data/doc/paper/2402.02057.md 1030 | Loong/data/doc/paper/2402.02416.md 1031 | Loong/data/doc/paper/2402.02716.md 1032 | Loong/data/doc/paper/2402.03620.md 1033 | Loong/data/doc/paper/2402.04247.md 1034 | Loong/data/doc/paper/2402.05861.md 1035 | Loong/data/doc/paper/2402.09178.md 1036 | Loong/data/doc/paper/2402.10104.md 1037 | Loong/data/doc/paper/2402.10705.md 1038 | Loong/data/doc/paper/2402.12348.md 1039 | Loong/data/doc/paper/2402.14328.md 1040 | Loong/data/doc/paper/2402.14658.md 1041 | Loong/data/doc/paper/2402.14830.md 1042 | Loong/data/doc/paper/2402.15205.md 1043 | Loong/data/doc/paper/2402.16627.md 1044 | Loong/data/doc/paper/2402.16981.md 1045 | Loong/data/doc/paper/2402.17563.md 1046 | Loong/data/doc/paper/2402.19113.md 1047 | Loong/data/doc/paper/2403.00278.md 1048 | Loong/data/doc/paper/2403.01876.md 1049 | Loong/data/doc/paper/2403.01979.md 1050 | Loong/data/doc/paper/2403.03101.md 1051 | Loong/data/doc/paper/2403.03230.md 1052 | Loong/data/doc/paper/2403.05530.md 1053 | Loong/data/doc/paper/2403.06560.md 1054 | Loong/data/doc/paper/2403.07059.md 1055 | Loong/data/doc/paper/2403.07559.md 1056 | Loong/data/doc/paper/2403.08140.md 1057 | Loong/data/doc/paper/2403.08251.md 1058 | Loong/data/doc/paper/2403.08337.md 1059 | Loong/data/doc/paper/2403.08570.md 1060 | Loong/data/doc/paper/2403.09308.md 1061 | Loong/data/doc/paper/2403.09746.md 1062 | Loong/data/doc/paper/2403.12806.md 1063 | Loong/data/doc/paper/2403.13309.md 1064 | Loong/data/doc/paper/2403.13355.md 1065 | Loong/data/doc/paper/2403.13438.md 1066 | Loong/data/doc/paper/2403.14123.md 1067 | Loong/data/doc/paper/2403.15556.md 1068 | Loong/data/doc/paper/2403.19154.md 1069 | Loong/data/doc/paper/2403.19949.md 1070 | Loong/data/doc/paper/2403.19962.md 1071 | Loong/data/doc/paper/2403.20208.md 1072 | Loong/data/doc/paper/2404.00376.md 1073 | Loong/data/doc/paper/2404.01349.md 1074 | Loong/data/doc/paper/2404.02152.md 1075 | Loong/data/doc/paper/2404.02575.md 1076 | Loong/data/doc/paper/2404.04292.md 1077 | Loong/data/doc/paper/2404.04850.md 1078 | Loong/data/doc/paper/2404.04925.md 1079 | Loong/data/doc/paper/2404.05264.md 1080 | Loong/data/doc/paper/2404.05443.md 1081 | Loong/data/doc/paper/2404.05955.md 1082 | Loong/data/doc/paper/2404.05971.md 1083 | Loong/data/doc/paper/2404.06654.md 1084 | Loong/data/doc/paper/2404.07084.md 1085 | Loong/data/doc/paper/2404.09356.md 1086 | Loong/data/doc/paper/2404.09486.md 1087 | Loong/data/doc/paper/2404.09699.md 1088 | Loong/data/doc/paper/2404.10150.md 1089 | Loong/data/doc/paper/2404.11269.md 1090 | Loong/data/doc/paper/2404.11276.md 1091 | Loong/data/doc/paper/2404.11912.md 1092 | Loong/data/doc/paper/2404.11932.md 1093 | Loong/data/doc/paper/2404.12135.md 1094 | Loong/data/doc/paper/2404.12318.md 1095 | Loong/data/doc/paper/2404.12736.md 1096 | Loong/data/doc/paper/2404.13066.md 1097 | Loong/data/doc/paper/2404.13591.md 1098 | Loong/data/doc/paper/2404.13886.md 1099 | Loong/data/doc/paper/2404.14082.md 1100 | Loong/data/doc/paper/2404.14122.md 1101 | Loong/data/doc/paper/2404.14215.md 1102 | Loong/data/doc/paper/2404.14387.md 1103 | Loong/data/doc/paper/2404.14688.md 1104 | Loong/data/doc/paper/2404.15247.md 1105 | Loong/data/doc/paper/2404.15378.md 1106 | Loong/data/doc/paper/2404.15381.md 1107 | Loong/data/doc/paper/2404.15488.md 1108 | Loong/data/doc/paper/2404.15676.md 1109 | Loong/data/doc/paper/2404.15939.md 1110 | Loong/data/doc/paper/2404.16375.md 1111 | Loong/data/doc/paper/2404.16792.md 1112 | Loong/data/doc/paper/2404.16906.md 1113 | Loong/data/doc/paper/2404.17153.md 1114 | Loong/data/doc/paper/2404.17833.md 1115 | Loong/data/doc/paper/2404.18231.md 1116 | Loong/data/doc/paper/2404.18239.md 1117 | Loong/data/doc/paper/2404.18311.md 1118 | Loong/data/doc/paper/2404.18353.md 1119 | Loong/data/doc/paper/2404.18824.md 1120 | Loong/data/doc/paper/2404.18852.md 1121 | Loong/data/doc/paper/2404.18911.md 1122 | Loong/data/doc/paper/2404.19505.md 1123 | Loong/data/doc/paper/2404.19553.md 1124 | Loong/data/doc/paper/2404.19705.md 1125 | Loong/data/doc/paper/2405.00263.md 1126 | Loong/data/doc/paper/2405.01029.md 1127 | Loong/data/doc/paper/2405.01147.md 1128 | Loong/data/doc/paper/2405.01378.md 1129 | Loong/data/doc/paper/2405.01868.md 1130 | Loong/data/doc/paper/2405.02019.md 1131 | Loong/data/doc/paper/2405.02421.md 1132 | Loong/data/doc/paper/2405.02714.md 1133 | Loong/data/doc/paper/2405.03007.md 1134 | Loong/data/doc/paper/2405.03085.md 1135 | Loong/data/doc/paper/2405.03097.md 1136 | Loong/data/doc/paper/2405.03133.md 1137 | Loong/data/doc/paper/2405.03456.md 1138 | Loong/data/doc/paper/2405.03488.md 1139 | Loong/data/doc/paper/2405.03547.md 1140 | Loong/data/doc/paper/2405.03644.md 1141 | Loong/data/doc/paper/2405.03710.md 1142 | Loong/data/doc/paper/2405.03917.md 1143 | Loong/data/doc/paper/2405.04028.md 1144 | Loong/data/doc/paper/2405.04237.md 1145 | Loong/data/doc/paper/2405.04434.md 1146 | Loong/data/doc/paper/2405.04437.md 1147 | Loong/data/doc/paper/2405.04497.md 1148 | Loong/data/doc/paper/2405.04781.md 1149 | Loong/data/doc/paper/2405.05136.md 1150 | Loong/data/doc/paper/2405.05465.md 1151 | Loong/data/doc/paper/2405.05824.md 1152 | Loong/data/doc/paper/2405.05885.md 1153 | Loong/data/doc/paper/2405.05904.md 1154 | Loong/data/doc/paper/2405.05945.md 1155 | Loong/data/doc/paper/2405.05950.md 1156 | Loong/data/doc/paper/2405.05955.md 1157 | Loong/data/doc/paper/2405.06093.md 1158 | Loong/data/doc/paper/2405.06107.md 1159 | Loong/data/doc/paper/2405.06161.md 1160 | Loong/data/doc/paper/2405.06219.md 1161 | Loong/data/doc/paper/2405.06331.md 1162 | Loong/data/doc/paper/2405.06856.md 1163 | Loong/data/doc/paper/2405.07435.md 1164 | Loong/data/doc/paper/2405.07437.md 1165 | Loong/data/doc/paper/2405.07460.md 1166 | Loong/data/doc/paper/2405.07482.md 1167 | Loong/data/doc/paper/2405.07518.md 1168 | Loong/data/doc/paper/2405.07628.md 1169 | Loong/data/doc/paper/2405.07657.md 1170 | Loong/data/doc/paper/2405.07745.md 1171 | Loong/data/doc/paper/2405.07761.md 1172 | Loong/data/doc/paper/2405.07764.md 1173 | Loong/data/doc/paper/2405.07770.md 1174 | Loong/data/doc/paper/2405.07898.md 1175 | Loong/data/doc/paper/2405.07960.md 1176 | Loong/data/doc/paper/2405.08054.md 1177 | Loong/data/doc/paper/2405.08135.md 1178 | Loong/data/doc/paper/2405.08289.md 1179 | Loong/data/doc/paper/2405.08480.md 1180 | Loong/data/doc/paper/2405.08514.md 1181 | Loong/data/doc/paper/2405.08542.md 1182 | Loong/data/doc/paper/2405.08555.md 1183 | Loong/data/doc/paper/2405.08582.md 1184 | Loong/data/doc/paper/2405.08745.md 1185 | Loong/data/doc/paper/2405.08748.md 1186 | Loong/data/doc/paper/2405.08813.md 1187 | Loong/data/doc/paper/2405.08839.md 1188 | Loong/data/doc/paper/2405.08863.md 1189 | Loong/data/doc/paper/2405.08888.md 1190 | Loong/data/doc/paper/2405.08908.md 1191 | Loong/data/doc/paper/2405.08920.md 1192 | Loong/data/doc/paper/2405.08981.md 1193 | Loong/data/doc/paper/2405.09115.md 1194 | Loong/data/doc/paper/2405.09117.md 1195 | Loong/data/doc/paper/2405.09142.md 1196 | Loong/data/doc/paper/2405.09204.md 1197 | Loong/data/doc/paper/2405.09251.md 1198 | Loong/data/doc/paper/2405.09255.md 1199 | Loong/data/doc/paper/2405.09266.md 1200 | Loong/data/doc/paper/2405.09276.md 1201 | Loong/data/doc/paper/2405.09285.md 1202 | Loong/data/doc/paper/2405.09306.md 1203 | Loong/data/doc/paper/2405.09330.md 1204 | Loong/data/doc/paper/2405.09341.md 1205 | Loong/data/doc/paper/2405.09395.md 1206 | Loong/data/doc/paper/2405.09711.md 1207 | Loong/data/doc/paper/2405.10150.md 1208 | Loong/data/doc/paper/2405.10480.md 1209 | Loong/data/doc/paper/2405.10516.md 1210 | Loong/data/doc/paper/2405.10632.md 1211 | Loong/data/doc/paper/2405.11120.md 1212 | Loong/data/doc/paper/2405.11537.md 1213 | Loong/data/doc/paper/2405.11612.md 1214 | Loong/data/doc/paper/2405.11647.md 1215 | Loong/data/doc/paper/2405.11804.md 1216 | Loong/data/doc/paper/2405.12523.md 1217 | Loong/data/doc/paper/2405.12532.md 1218 | Loong/data/doc/paper/2405.12648.md 1219 | Loong/data/doc/paper/2405.12750.md 1220 | Loong/data/doc/paper/2405.12819.md 1221 | Loong/data/doc/paper/2405.12910.md 1222 | Loong/data/doc/paper/2405.12933.md 1223 | Loong/data/doc/paper/2405.12979.md 1224 | Loong/data/doc/paper/2405.12999.md 1225 | Loong/data/doc/paper/2405.13050.md 1226 | Loong/data/doc/paper/2405.13816.md 1227 | Loong/data/doc/paper/2405.13929.md 1228 | Loong/data/doc/paper/2405.13966.md 1229 | Loong/data/doc/paper/2405.14169.md 1230 | Loong/data/doc/paper/2405.14205.md 1231 | Loong/data/doc/paper/2405.14231.md 1232 | Loong/data/doc/paper/2405.14573.md 1233 | Loong/data/doc/paper/2405.14591.md 1234 | Loong/data/doc/paper/2405.14722.md 1235 | Loong/data/doc/paper/2405.14734.md 1236 | Loong/data/doc/paper/2405.14751.md 1237 | Loong/data/doc/paper/2405.14785.md 1238 | Loong/data/doc/paper/2405.14804.md 1239 | Loong/data/doc/paper/2405.14828.md 1240 | Loong/data/doc/paper/2405.14930.md 1241 | Loong/data/doc/paper/2405.14974.md 1242 | Loong/data/doc/paper/2405.15007.md 1243 | Loong/data/doc/paper/2405.15032.md 1244 | Loong/data/doc/paper/2405.15116.md 1245 | Loong/data/doc/paper/2405.15130.md 1246 | Loong/data/doc/paper/2405.15160.md 1247 | Loong/data/doc/paper/2405.15194.md 1248 | Loong/data/doc/paper/2405.15287.md 1249 | Loong/data/doc/paper/2405.15306.md 1250 | Loong/data/doc/paper/2405.15307.md 1251 | Loong/data/doc/paper/2405.15341.md 1252 | Loong/data/doc/paper/2405.15349.md 1253 | Loong/data/doc/paper/2405.15604.md 1254 | Loong/data/doc/paper/2405.15614.md 1255 | Loong/data/doc/paper/2405.15638.md 1256 | Loong/data/doc/paper/2405.15652.md 1257 | Loong/data/doc/paper/2405.15756.md 1258 | Loong/data/doc/paper/2405.15984.md 1259 | Loong/data/doc/paper/2405.16009.md 1260 | Loong/data/doc/paper/2405.16247.md 1261 | Loong/data/doc/paper/2405.16276.md 1262 | Loong/data/doc/paper/2405.16388.md 1263 | Loong/data/doc/paper/2405.16406.md 1264 | Loong/data/doc/paper/2405.16444.md 1265 | Loong/data/doc/paper/2405.16510.md 1266 | Loong/data/doc/paper/2405.16528.md 1267 | Loong/data/doc/paper/2405.16533.md 1268 | Loong/data/doc/paper/2405.16661.md 1269 | Loong/data/doc/paper/2405.16681.md 1270 | Loong/data/doc/paper/2405.16714.md 1271 | Loong/data/doc/paper/2405.16783.md 1272 | Loong/data/doc/paper/2405.16821.md 1273 | Loong/data/doc/paper/2405.16833.md 1274 | Loong/data/doc/paper/2405.16847.md 1275 | Loong/data/doc/paper/2405.16908.md 1276 | Loong/data/doc/paper/2405.17051.md 1277 | Loong/data/doc/paper/2405.17147.md 1278 | Loong/data/doc/paper/2405.17220.md 1279 | Loong/data/doc/paper/2405.17233.md 1280 | Loong/data/doc/paper/2405.17249.md 1281 | Loong/data/doc/paper/2405.17374.md 1282 | Loong/data/doc/paper/2405.17386.md 1283 | Loong/data/doc/paper/2405.17438.md 1284 | Loong/data/doc/paper/2405.17512.md 1285 | Loong/data/doc/paper/2405.17602.md 1286 | Loong/data/doc/paper/2405.17653.md 1287 | Loong/data/doc/paper/2405.17741.md 1288 | Loong/data/doc/paper/2405.17820.md 1289 | Loong/data/doc/paper/2405.17915.md 1290 | Loong/data/doc/paper/2405.17927.md 1291 | Loong/data/doc/paper/2405.17935.md 1292 | Loong/data/doc/paper/2405.17950.md 1293 | Loong/data/doc/paper/2405.17969.md 1294 | Loong/data/doc/paper/2405.18027.md 1295 | Loong/data/doc/paper/2405.18111.md 1296 | Loong/data/doc/paper/2405.18137.md 1297 | Loong/data/doc/paper/2405.18193.md 1298 | Loong/data/doc/paper/2405.18208.md 1299 | Loong/data/doc/paper/2405.18272.md 1300 | Loong/data/doc/paper/2405.18320.md 1301 | Loong/data/doc/paper/2405.18348.md 1302 | Loong/data/doc/paper/2405.18392.md 1303 | Loong/data/doc/paper/2405.18415.md 1304 | Loong/data/doc/paper/2405.18573.md 1305 | Loong/data/doc/paper/2405.18628.md 1306 | Loong/data/doc/paper/2405.18718.md 1307 | Loong/data/doc/paper/2405.18886.md 1308 | Loong/data/doc/paper/2405.18922.md 1309 | Loong/data/doc/paper/2405.19010.md 1310 | Loong/data/doc/paper/2405.19086.md 1311 | Loong/data/doc/paper/2405.19103.md 1312 | Loong/data/doc/paper/2405.19107.md 1313 | Loong/data/doc/paper/2405.19226.md 1314 | Loong/data/doc/paper/2405.19261.md 1315 | Loong/data/doc/paper/2405.19262.md 1316 | Loong/data/doc/paper/2405.19313.md 1317 | Loong/data/doc/paper/2405.19316.md 1318 | Loong/data/doc/paper/2405.19323.md 1319 | Loong/data/doc/paper/2405.19327.md 1320 | Loong/data/doc/paper/2405.19425.md 1321 | Loong/data/doc/paper/2405.19524.md 1322 | Loong/data/doc/paper/2405.19534.md 1323 | Loong/data/doc/paper/2405.19550.md 1324 | Loong/data/doc/paper/2405.19563.md 1325 | Loong/data/doc/paper/2405.19616.md 1326 | Loong/data/doc/paper/2405.19668.md 1327 | Loong/data/doc/paper/2405.19715.md 1328 | Loong/data/doc/paper/2405.19716.md 1329 | Loong/data/doc/paper/2405.19806.md 1330 | Loong/data/doc/paper/2405.19846.md 1331 | Loong/data/doc/paper/2405.19888.md 1332 | Loong/data/doc/paper/2405.19973.md 1333 | Loong/data/doc/paper/2405.20092.md 1334 | Loong/data/doc/paper/2405.20099.md 1335 | Loong/data/doc/paper/2405.20175.md 1336 | Loong/data/doc/paper/2405.20215.md 1337 | Loong/data/doc/paper/2405.20216.md 1338 | Loong/data/doc/paper/2405.20304.md 1339 | Loong/data/doc/paper/2405.20314.md 1340 | Loong/data/doc/paper/2405.20347.md 1341 | Loong/data/doc/paper/2405.20362.md 1342 | Loong/data/doc/paper/2405.20404.md 1343 | Loong/data/doc/paper/2405.20512.md 1344 | Loong/data/doc/paper/2405.20625.md 1345 | Loong/data/doc/paper/2405.20703.md 1346 | Loong/data/doc/paper/2405.20773.md 1347 | Loong/data/doc/paper/2405.20774.md 1348 | Loong/data/doc/paper/2405.20778.md 1349 | Loong/data/doc/paper/2405.20830.md 1350 | Loong/data/doc/paper/2405.20947.md 1351 | Loong/data/doc/paper/2405.20974.md 1352 | Loong/data/doc/paper/2405.21018.md 1353 | Loong/data/doc/paper/2405.21040.md 1354 | Loong/data/doc/paper/2405.21046.md 1355 | Loong/output/qwen/loong_evaluate.jsonl 1356 | Loong/output/qwen/loong_generate.jsonl 1357 | -------------------------------------------------------------------------------- /Loong/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2023 Alibaba Cloud 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Loong/README.md: -------------------------------------------------------------------------------- 1 | please download data from https://drive.google.com/file/d/1WnoiR0pADg_DEvfrPUe7bhzjnqAbahBH/view?usp=sharing 2 | 3 | input datas are in Loong/data/loong_process.jsonl 4 | output datas are in Loong/output/qwen 5 | 6 | ```cd src && bash run.sh``` can get results as in the paper -------------------------------------------------------------------------------- /Loong/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.111.0 2 | numpy==2.0.0 3 | openai==1.35.7 4 | pandas==2.2.2 5 | pydantic==2.8.0 6 | PyYAML==6.0.1 7 | Requests==2.32.3 8 | sentencepiece==0.2.0 9 | safetensors==0.4.3 10 | sse_starlette==2.1.2 11 | tiktoken==0.7.0 12 | torch==2.3.0 13 | tqdm==4.66.4 14 | transformers==4.41.2 15 | uvicorn==0.30.1 16 | vllm==0.5.0.post1 17 | xformers==0.0.26.post1 18 | vllm-flash-attn==2.5.9 19 | anthropic==0.30.1 20 | google-generativeai==0.7.1 -------------------------------------------------------------------------------- /Loong/src/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ARGS=() 4 | # MODEL 5 | MODEL="qwen" 6 | MODEL_CONFIG="$MODEL.yaml" 7 | EVAL_MODEL_CONFIG="gpt4o.yaml" 8 | # INPUT PATH 9 | DOC_PATH="../data/doc" 10 | INPUT_PATH="../data/loong.jsonl" 11 | MODEL_CONFIG_DIR="../config/models" 12 | # OUTPUT PATH 13 | OUTPUT_PROCESS_PATH="../data/loong_process.jsonl" 14 | OUTPUT_PATH="../output/$MODEL/loong_generate.jsonl" 15 | OUTPUT_EVALUATE_PATH="../output/$MODEL/loong_evaluate.jsonl" 16 | # ARGUMENTS 17 | MAX_LENGTH="128000" # According to the context window of llm. The value of config takes precedence 18 | PROCESS_NUM_GEN="3" # Concurrency number of model generate 19 | PROCESS_NUM_EVAL="20" # Concurrency number of model eval 20 | DEBUG_NUM="-1" # -1 means all data 21 | 22 | while [[ $# -gt 0 ]]; do 23 | case "$1" in 24 | --model) 25 | shift 26 | MODEL="$1" 27 | MODEL_CONFIG="$MODEL.yaml" 28 | OUTPUT_PATH="../output/$MODEL/loong_generate.jsonl" 29 | OUTPUT_EVALUATE_PATH="../output/$MODEL/loong_evaluate.jsonl" 30 | ;; 31 | --continue_gen) 32 | ARGS+="--continue_gen" 33 | ;; 34 | *) 35 | echo "unknown parameter: $1" 36 | exit 1 37 | ;; 38 | esac 39 | shift 40 | done 41 | echo "MODEL=[$MODEL], MODEL_CONFIG=[$MODEL_CONFIG]" 42 | 43 | ARGS+=( 44 | "--models" "$MODEL_CONFIG" 45 | "--eval_model" "$EVAL_MODEL_CONFIG" 46 | "--debug_num" "$DEBUG_NUM" 47 | "--doc_path" "$DOC_PATH" 48 | "--input_path" "$INPUT_PATH" 49 | "--output_process_path" "$OUTPUT_PROCESS_PATH" 50 | "--output_path" "$OUTPUT_PATH" 51 | "--evaluate_output_path" "$OUTPUT_EVALUATE_PATH" 52 | "--max_length" "$MAX_LENGTH" 53 | "--model_config_dir" "$MODEL_CONFIG_DIR" 54 | "--process_num_gen" "$PROCESS_NUM_GEN" 55 | "--process_num_eval" "$PROCESS_NUM_EVAL" 56 | ) 57 | 58 | # Execute in order 59 | # python step1_load_data.py "${ARGS[@]}" 60 | # python step2_model_generate.py "${ARGS[@]}" 61 | python step3_model_evaluate.py "${ARGS[@]}" 62 | python step4_cal_metric.py "${ARGS[@]}" 63 | -------------------------------------------------------------------------------- /Loong/src/step1_load_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | from utils.args import parse_arguments 5 | from utils.prompt import get_generate_prompts 6 | from utils.util import count_lines, logger 7 | 8 | 9 | if __name__ == '__main__': 10 | args = parse_arguments() 11 | random.seed(args.seed) 12 | logger.debug(f"args: {args}") 13 | ## step1 14 | if not os.path.exists(args.output_process_path) or (args.debug_num > 0 and count_lines(args.output_process_path) != args.debug_num) or (args.debug_num < 0 and count_lines(args.output_process_path) != count_lines(args.input_path)): 15 | generate_prompts = get_generate_prompts(args) 16 | 17 | with open(args.output_process_path, 'w') as f: 18 | for p in generate_prompts: 19 | f.write(json.dumps(p, ensure_ascii=False, separators=(',', ':')) + "\n") 20 | else: 21 | logger.debug(f"Path exist: {args.output_process_path}") 22 | -------------------------------------------------------------------------------- /Loong/src/step2_model_generate.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | from utils.args import parse_arguments 5 | from utils.config import load 6 | from utils.generate import generate 7 | from utils.util import create_path, continue_gen, logger 8 | 9 | 10 | if __name__ == '__main__': 11 | args = parse_arguments() 12 | random.seed(args.seed) 13 | 14 | config = load(open(f"{args.model_config_dir}/{args.models}")) 15 | # The value of config takes precedence 16 | if config.get('run_args', {}).get('max_length', None): 17 | args.max_length = config.get('run_args', {}).get('max_length', None) 18 | logger.debug(f"config value: max_length={args.max_length} takes precedences") 19 | 20 | tag = "generate_response" 21 | 22 | with open(args.output_process_path, "r") as f: 23 | generate_data = [json.loads(item.strip()) for item in f.readlines()] 24 | 25 | if not os.path.exists(args.output_path): 26 | create_path(args.output_path) 27 | # api 28 | generate(generate_data, config, args.output_path, args.process_num_gen, tag=tag) 29 | else: 30 | 31 | if args.continue_gen: 32 | continue_generate_data = continue_gen(args.output_path, generate_data, tag=tag) 33 | # api 34 | generate(continue_generate_data, config, args.output_path, args.process_num_gen, tag=tag) 35 | else: 36 | logger.debug(f"Path exist: {args.output_path}") 37 | -------------------------------------------------------------------------------- /Loong/src/step3_model_evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | from utils.args import parse_arguments 3 | from utils.prompt import get_evaluate_prompts 4 | from utils.generate import generate 5 | from utils.util import create_path, continue_gen, logger 6 | from utils.config import load 7 | 8 | 9 | if __name__ == '__main__': 10 | args = parse_arguments() 11 | 12 | eval_config = load(open(f"{args.model_config_dir}/{args.eval_model}")) 13 | evaluate_prompts = get_evaluate_prompts(args, tag="generate_response") 14 | tag = "eval_response" 15 | 16 | if not os.path.exists(args.evaluate_output_path): 17 | create_path(args.evaluate_output_path) 18 | generate(evaluate_prompts, eval_config, args.evaluate_output_path, args.process_num_eval, tag=tag) 19 | else: 20 | if args.continue_gen: 21 | continue_evaluate_prompts = continue_gen(args.evaluate_output_path, evaluate_prompts, tag=tag) 22 | generate(continue_evaluate_prompts, eval_config, args.evaluate_output_path, args.process_num_eval, 23 | tag=tag) 24 | else: 25 | logger.debug(f"Path exist: {args.evaluate_output_path}") 26 | -------------------------------------------------------------------------------- /Loong/src/step4_cal_metric.py: -------------------------------------------------------------------------------- 1 | from utils.args import parse_arguments 2 | from utils.metric import cal_metric 3 | 4 | 5 | if __name__ == '__main__': 6 | args = parse_arguments() 7 | 8 | print("------------------ All metrics: ------------------") 9 | cal_metric(args, tag="eval_response") 10 | print("") 11 | 12 | print(f"------------------ Level metrics: ------------------") 13 | for level in [1, 2, 3, 4]: 14 | print(f"------------------ Level {level} metrics: ------------------") 15 | cal_metric(args, tag="eval_response", level=level) 16 | print("") 17 | 18 | print(f"------------------ Set metrics: ------------------") 19 | for set in [1, 2, 3, 4]: 20 | print(f"------------------ Set {set} metrics ------------------") 21 | for level in [1, 2, 3, 4]: 22 | cal_metric(args, tag="eval_response", set=set, level=level) 23 | cal_metric(args, tag="eval_response", set=set, level=None) 24 | print("") 25 | -------------------------------------------------------------------------------- /Loong/src/test.sh: -------------------------------------------------------------------------------- 1 | 2 | MODEL_CONFIG="1" 3 | ARGS=( 4 | "--models" "$MODEL_CONFIG" 5 | "--eval_model" "$MODEL_CONFIG" 6 | "--debug_num" "$MODEL_CONFIG" 7 | "--doc_path" "$MODEL_CONFIG" 8 | "--input_path" "$MODEL_CONFIG" 9 | "--output_process_path" "$MODEL_CONFIG" 10 | "--output_path" "$MODEL_CONFIG" 11 | "--evaluate_output_path" "$MODEL_CONFIG" 12 | "--max_length" "$MODEL_CONFIG" 13 | "--model_config_dir" "$MODEL_CONFIG" 14 | "--process_num_gen" "$MODEL_CONFIG" 15 | "--process_num_eval" "$MODEL_CONFIG" 16 | "--rag" 17 | "--tmp" "$MODEL_CONFIG" 18 | ) 19 | 20 | # Check whether the incoming parameters contain --continue_gen 21 | for param in "$@"; do 22 | if [ "$param" == "--continue_gen" ]; then 23 | ARGS+=("--continue_gen") 24 | fi 25 | done 26 | 27 | echo "${ARGS[@]}" -------------------------------------------------------------------------------- /Loong/src/utils/args.py: -------------------------------------------------------------------------------- 1 | #!/user/bin/env python 2 | # coding=utf-8 3 | import argparse 4 | 5 | 6 | def parse_arguments(): 7 | parser = argparse.ArgumentParser(description='args for evaluate.py') 8 | parser.add_argument("--models", "-c", default="gpt4o.yaml") 9 | parser.add_argument("--eval_model", type=str, default="gpt4.yaml") 10 | parser.add_argument('--debug_num', type=int, default=15, help="Control the number of generated items. If <0, it means using all data") 11 | parser.add_argument('--shuffle_prompts', action="store_true") 12 | parser.add_argument('--debug_level', type=str, default="1,2,3,4", help="Represents the level to be evaluated, eg: 1,2 or 3") 13 | parser.add_argument('--debug_set', type=str, default="1,2,3,4", help="Represents the set level to be evaluated, eg: 1,2 or 3") 14 | parser.add_argument('--process_num_gen', type=int, default=10) 15 | parser.add_argument('--process_num_eval', type=int, default=10) 16 | parser.add_argument('--seed', type=int, default=1000000007) 17 | parser.add_argument('--ratio', type=float, default=1) 18 | parser.add_argument('--doc_path', type=str, default='./doc') 19 | parser.add_argument('--input_path', type=str, default='../data/loong.jsonl') 20 | parser.add_argument('--output_process_path', type=str, default='../data/loong_process.jsonl') 21 | parser.add_argument('--output_path', type=str, default='../output/loong_generate.jsonl') 22 | parser.add_argument('--evaluate_output_path', type=str, default='../output/loong_evaluate.jsonl') 23 | parser.add_argument('--max_length', type=int, default=300000) 24 | parser.add_argument('--domain', type=str, default='', help='financial, paper, legal') 25 | parser.add_argument('--add_noise', action="store_true", help="A boolean flag that defaults to False") 26 | parser.add_argument('--rag', action="store_true", help="whether to use rag model") 27 | parser.add_argument('--rag_num', type=int, help="recall top n") 28 | parser.add_argument('--continue_gen', action="store_true", help="whether to continue_generate from exist file") 29 | parser.add_argument('--model_config_dir', type=str, default='../config/models') 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | -------------------------------------------------------------------------------- /Loong/src/utils/config.py: -------------------------------------------------------------------------------- 1 | #!/user/bin/env python 2 | # coding=utf-8 3 | ''' 4 | @project : loong 5 | @author : fucheng 6 | #@file : config.py 7 | #@ide : PyCharm 8 | #@time : 2024-06-02 13:39:36 9 | ''' 10 | import functools 11 | import os 12 | from typing import Any, Dict 13 | 14 | import yaml 15 | 16 | class ExtLoaderMeta(type): 17 | def __new__(metacls: Any, __name__: str, __bases__: Any, __dict__: Dict) -> Any: 18 | """Add include constructer to class.""" 19 | 20 | # register the include constructor on the class 21 | cls = super().__new__(metacls, __name__, __bases__, __dict__) 22 | cls.add_constructor("!include", cls.construct_include) 23 | 24 | return cls 25 | 26 | 27 | class ExtLoader(yaml.Loader, metaclass=ExtLoaderMeta): 28 | """YAML Loader with `!include` constructor.""" 29 | 30 | def __init__(self, stream: Any) -> None: 31 | """Initialise Loader.""" 32 | 33 | try: 34 | self._root = os.path.split(stream.name)[0] 35 | except AttributeError: 36 | self._root = os.path.curdir 37 | 38 | super().__init__(stream) 39 | 40 | def construct_include(self, node: Any) -> str: 41 | """Include file referenced at node.""" 42 | 43 | filename = os.path.abspath( 44 | os.path.join(self._root, str(self.construct_scalar(node))) 45 | ) 46 | extension = os.path.splitext(filename)[1].lstrip(".") 47 | 48 | with open(filename, "r") as f: 49 | if extension in ("yaml", "yml"): 50 | return yaml.load(f, ExtLoader) 51 | else: 52 | return "".join(f.readlines()) 53 | 54 | 55 | # Set MyLoader as default. 56 | load = functools.partial(yaml.load, Loader=ExtLoader) 57 | -------------------------------------------------------------------------------- /Loong/src/utils/generate.py: -------------------------------------------------------------------------------- 1 | import json 2 | from tqdm import tqdm 3 | import multiprocessing 4 | import requests 5 | import numpy as np 6 | from functools import partial 7 | from decimal import Decimal 8 | import numpy as np 9 | import time 10 | from openai import OpenAI 11 | from anthropic import Anthropic 12 | # import google.generativeai as genai 13 | 14 | class MyEncoder(json.JSONEncoder): 15 | def default(self, obj): 16 | if isinstance(obj, np.ndarray): 17 | return obj.tolist() 18 | elif isinstance(obj, bytes): 19 | try: 20 | return str(obj, encoding='utf-8') 21 | except: 22 | return str(obj, encoding='gbk') 23 | elif isinstance(obj, Decimal): 24 | return float(obj) 25 | # print(obj, type(obj)) 26 | return json.JSONEncoder.default(self, obj) 27 | 28 | 29 | def get_api_results(prompt_input, config): 30 | prompt = prompt_input['prompt'] 31 | 32 | if config['type'] == 'openai' or config['type'] == 'vllm': 33 | # client = OpenAI(api_key=config['args']['api_key'], 34 | # base_url=config['args']['api_url'] if config['args']['api_url']!='' else None) 35 | # try: 36 | # response = client.chat.completions.create( 37 | # messages=[{"role": "user","content": prompt}], 38 | # model=config['args']['api_name'], 39 | # temperature=config['run_args']['temperature'] 40 | # ) 41 | # return response.choices[0].message.content 42 | try: 43 | url = "http://47.88.8.18:8088/api/ask" 44 | headers = { 45 | "Content-Type": "application/json", 46 | "Authorization": "Bearer eyJ0eXAiOiJqd3QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6IjIzNzgzNiIsInBhc3N3b3JkIjoiMjM3ODM2MTIzIiwiZXhwIjoyMDMxMzc2MjA0fQ.Lz6IKLMUTWWT5isamrYTmbAcGNFpAqt87YFF2bynP3w" 47 | } 48 | raw_info = { 49 | "model": config['args']['api_name'], 50 | "messages": [{"role": "user", "content": prompt}], 51 | "temperature": config['run_args']['temperature'], 52 | } 53 | callback = requests.post(url, data=json.dumps(raw_info), headers=headers, timeout=(10000, 10000)) 54 | # print(callback) 55 | result = callback.json() 56 | # uid = "_".join(result['uid'].split('-')) 57 | # usage = result['data']['response']['usage'] 58 | # json.dump({'uid': uid, 'usage': usage}, open(f'/root/multiagent_doc2graph/Loong/token_num_statics/{uid}.json', 'w')) 59 | return result['data']['response']['choices'][0]['message']['content'] 60 | except Exception as e: 61 | print(e) 62 | return [] 63 | 64 | elif config['type'] == 'gemini': 65 | genai.configure(api_key=config['args']['api_key']) 66 | 67 | model = genai.GenerativeModel(name=config['args']['api_name']) 68 | try: 69 | response = model.generate_content(prompt, 70 | generation_config=genai.types.GenerationConfig( 71 | temperature=config['run_args']['temperature'])) 72 | return response.text 73 | except Exception as e: 74 | print(e) 75 | return [] 76 | 77 | elif config['type'] == 'claude': 78 | client = Anthropic(api_key=config['args']['api_key']) 79 | try: 80 | message = client.messages.create( 81 | messages=[{"role": "user", "content": prompt,}], 82 | model=config['args']['api_name'], 83 | ) 84 | return message.content 85 | except Exception as e: 86 | print(e) 87 | return [] 88 | 89 | elif config['type'] == 'http': 90 | headers = {"Content-Type": "application/json", 91 | "Authorization": config['args']['api_key']} 92 | raw_info = { 93 | "model": config['args']['api_name'], 94 | "messages": [{"role": "user", "content": prompt}], 95 | "n": 1} 96 | raw_info.update(config['run_args']) 97 | try: 98 | callback = requests.post(config['args']['api_url'], data=json.dumps(raw_info, cls=MyEncoder), headers=headers, 99 | timeout=(600, 600)) 100 | result = callback.json() 101 | # todo: customize the result 102 | return result['data']['response']['choices'][0]['message']['content'] 103 | except Exception as e: 104 | print(e) 105 | return [] 106 | 107 | else: 108 | raise f"type of {config['type']} is not valid" 109 | 110 | def fetch_api_result(prompt_input, config, max_retries=5): 111 | """Attempt to get a valid result from the API, with a maximum number of retries.""" 112 | for _ in range(max_retries): 113 | result = get_api_results(prompt_input, config) 114 | if result: 115 | return result 116 | # Sleep briefly to not hammer the API in case of errors or rate limits 117 | time.sleep(5) # Uncomment if needed 118 | return None 119 | 120 | 121 | def api(prompt, output_path, config, tag): 122 | response_content = fetch_api_result(prompt, config) 123 | result = prompt.copy() 124 | result[tag] = response_content or "" 125 | with open(output_path, 'a', encoding='utf-8') as fw: 126 | fw.write(json.dumps(result, ensure_ascii=False) + '\n') 127 | 128 | 129 | def generate(prompts, config, output_path, process_num, tag): 130 | func = partial(api, output_path=output_path, config=config, tag=tag) 131 | with multiprocessing.Pool(processes=process_num) as pool: 132 | for _ in tqdm(pool.imap(func, prompts), total=len(prompts)): 133 | pass -------------------------------------------------------------------------------- /Loong/src/utils/metric.py: -------------------------------------------------------------------------------- 1 | import re, json 2 | import numpy as np 3 | 4 | 5 | def extract_number(text): 6 | match = re.search(r'\[\[([0-9]*\.?[0-9]+)\]\]', text) 7 | if match: 8 | return float(match.group(1)) 9 | match = re.search(r'\[([0-9]*\.?[0-9]+)\]', text) 10 | if match: 11 | return float(match.group(1)) 12 | return None 13 | 14 | 15 | def failure_prompts(args, tag): 16 | eval_lines = open(args.old_evaluate_output_path).readlines() 17 | gen_lines = open(args.old_output_path).readlines() 18 | scores = [] 19 | effective_samples = [] 20 | no_effective_samples = [] 21 | for line in eval_lines: 22 | line = json.loads(line.strip()) 23 | if not extract_number(line[tag]) or line['generate_response'] == "": 24 | no_effective_samples.append(line['id']) 25 | for line in gen_lines: 26 | line = json.loads(line.strip()) 27 | if line['id'] in no_effective_samples: 28 | effective_samples.append( 29 | {'id': line['id'], 'prompt': line['prompt'], 'question': line['question'], 'answer': line['answer']}) 30 | return effective_samples 31 | 32 | 33 | def cal_metric(args, tag, level=None, set=None): 34 | lines = open(args.evaluate_output_path).readlines() 35 | scores = [] 36 | effective_samples = [] 37 | no_effective_samples = [] 38 | for line in lines: 39 | line = json.loads(line.strip()) 40 | 41 | _level = line.get("level", None) 42 | _set = line.get("set", None) 43 | if level and _level and _level != level: 44 | continue 45 | if set and _set and _set != set: 46 | continue 47 | 48 | if extract_number(line[tag]) is not None: 49 | scores.append(extract_number(line[tag])) 50 | effective_samples.append(line) 51 | else: 52 | no_effective_samples.append(line['id']) 53 | 54 | num_full_marks = sum(1 for x in scores if x == 100) 55 | try: 56 | metric = (len(effective_samples) / len(lines), np.mean(scores), f"{num_full_marks}/{len(effective_samples)}", num_full_marks / len(effective_samples)) 57 | 58 | print(f"level: {level}, set: {set}, scoring_success_rate: {metric[0]:.2f} , avg_score: {metric[1]:.2f} , perfect_rate_calculation: {metric[2]} , perfect_rate: {metric[3]:.2f}") 59 | return metric 60 | 61 | except: 62 | print(f"level: {level}, set: {set}, scoring_success_rate:0.00, avg_score:0.00, perfect_rate_calculation:0/0, perfect_rate:0.00") 63 | return None -------------------------------------------------------------------------------- /Loong/src/utils/prompt.py: -------------------------------------------------------------------------------- 1 | import json 2 | from tqdm import tqdm 3 | import random 4 | import uuid 5 | from pathlib import Path 6 | import glob 7 | from .token_length import token_length 8 | import io 9 | 10 | 11 | file_handle_cache = {} 12 | 13 | def close_cached_files(): 14 | for file, handle in file_handle_cache.items(): 15 | if isinstance(handle, io.IOBase): 16 | handle.close() 17 | file_handle_cache.clear() 18 | 19 | 20 | def get_content(args, item, doc_name, idx): 21 | global file_handle_cache 22 | doc_type, doc_level = item['type'], item['level'] 23 | docPath = Path(args.doc_path) / doc_type 24 | 25 | if doc_type == 'financial': 26 | if str(doc_level).strip() != '4': 27 | _file = glob.glob(f"{docPath}/*2024-{doc_name}*.txt")[0] 28 | else: 29 | _file = glob.glob(f"{docPath}/*{doc_name}*.txt")[0] 30 | try: 31 | with open(_file, 'r') as txt_file: 32 | _doc_name = Path(_file).stem.split('-')[-1] 33 | # doc = f"<标题起始符>《{_doc_name}》<标题终止符>\n" + txt_file.read() + "\n\n" 34 | doc = f"《{_doc_name}》\n" + txt_file.read() + "\n\n" 35 | except IOError: 36 | print(f"Error: File {_file} could not be opened.") 37 | 38 | elif doc_type == 'paper': 39 | path = docPath / doc_name 40 | try: 41 | with open(path, 'r') as txt_file: 42 | content = txt_file.read() 43 | doc_name = content.split('\n', 1)[0].strip("#").strip() 44 | # doc = f"<标题起始符>{doc_name}<标题终止符>\n" + content + "\n\n" 45 | doc = f"{doc_name}\n" + content + "\n\n" 46 | except IOError: 47 | print(f"Error: File {path} could not be opened.") 48 | 49 | elif doc_type == 'legal': 50 | _file = docPath / "legal.json" 51 | if _file in file_handle_cache: 52 | legal_js = file_handle_cache[_file] 53 | # txt_file.seek(0) 54 | else: 55 | with open(_file, 'r') as txt_file: 56 | legal_js = json.load(txt_file) 57 | file_handle_cache[_file] = legal_js 58 | 59 | if doc_level == 4 and ('阅读以上判决文书,我将给你若干份判决结果:' in item['instruction']): 60 | content = legal_js[doc_name]["content"] 61 | else: 62 | content = legal_js[doc_name]["content"] + legal_js[doc_name]["result"] 63 | # doc = f"<标题起始符>《判决文书{idx + 1}》<标题终止符>\n" + content + "\n\n" 64 | doc = f"《判决文书{idx + 1}》\n" + content + "\n\n" 65 | 66 | else: 67 | raise "doc_type not valid!" 68 | 69 | return doc 70 | 71 | 72 | def get_contents(args, item, doc_names): 73 | contents = [] 74 | for idx, doc_name in enumerate(doc_names): 75 | content = get_content(args, item, doc_name, idx) 76 | contents.append(content) 77 | return contents 78 | 79 | 80 | def get_doc_str(args, item, prompt_template): 81 | len_prompt_template = token_length(prompt_template) - token_length("{docs}") 82 | is_shuffle = item.get("shuffle_doc", True) 83 | 84 | docs = item['doc'] if not args.rag else item["recall_chunks"][:args.rag_num] 85 | docs_list = [] 86 | 87 | if args.rag: 88 | for doc in docs: 89 | if len_prompt_template + sum(token_length(s) for s in docs_list) + token_length(doc) > args.max_length: 90 | continue 91 | docs_list.append(doc) 92 | else: 93 | # read content from given doc names 94 | contents = get_contents(args, item, docs) 95 | # shuffle 96 | if is_shuffle and item['type'] == 'financial': 97 | random.shuffle(contents) 98 | for content in contents: 99 | if len_prompt_template + sum(token_length(s) for s in docs_list) + token_length(content) > args.max_length: 100 | continue 101 | docs_list.append(content) 102 | 103 | # shuffle 104 | if is_shuffle: 105 | random.shuffle(docs_list) 106 | docs_str = "".join(docs_list) 107 | return docs_str 108 | 109 | 110 | def get_generate_prompt(args, item): 111 | replace_dict = {"{question}": item['question'], "{instruction}": item['instruction']} 112 | prompt_template = item['prompt_template'] 113 | for k, v in replace_dict.items(): 114 | prompt_template = prompt_template.replace(k, v) 115 | doc_str = get_doc_str(args, item, prompt_template) 116 | prompt_template = prompt_template.replace("{docs}", doc_str) 117 | item['docs'] = doc_str 118 | item['prompt'] = prompt_template 119 | return item 120 | 121 | 122 | def get_generate_prompts(args): 123 | prompts = [] 124 | with open(args.input_path, 'r') as file: 125 | lines = file.readlines() 126 | 127 | if args.shuffle_prompts: 128 | random.shuffle(lines) 129 | # debug num samples 130 | if args.debug_num and args.debug_num > 0: 131 | lines = lines[:args.debug_num] 132 | if args.ratio != 1: 133 | random.shuffle(lines) 134 | lines = lines[int(len(prompts) * args.ratio):] 135 | 136 | for line in tqdm(lines, desc="gen_prompts"): 137 | item = json.loads(line) 138 | doc_type, set_level, level = item['type'], item['set'], item['level'] 139 | # filter 140 | if args.domain.strip(): 141 | domains = args.domain.strip().split(",") 142 | domains = list(map(lambda x: x.strip(), domains)) 143 | if doc_type not in domains: 144 | continue 145 | if args.debug_set.strip(): 146 | sets = args.debug_set.strip().split(",") 147 | sets = list(map(int, sets)) 148 | if set_level not in sets: 149 | continue 150 | if args.debug_level.strip(): 151 | levels = args.debug_level.strip().split(",") 152 | levels = list(map(int, levels)) 153 | if level not in levels: 154 | continue 155 | 156 | prompt = get_generate_prompt(args, item) 157 | prompts.append(prompt) 158 | close_cached_files() 159 | return prompts 160 | 161 | 162 | def get_evaluate_prompts(args, tag): 163 | prompt = '''[Question] 164 | {} 165 | 166 | [Gold Answer] 167 | {} 168 | 169 | [The Start of Assistant's Predicted Answer] 170 | {} 171 | [The End of Assistant's Predicted Answer] 172 | 173 | [System] 174 | We would like to request your feedback on the performance of the AI assistant in response to the user question displayed above according to the gold answer. Please use the following listed aspects and their descriptions as evaluation criteria: 175 | - Accuracy and Hallucinations: The assistant's answer is semantically consistent with the gold answer; The numerical value and order need to be accurate, and there should be no hallucinations. 176 | - Completeness: Referring to the reference answers, the assistant's answer should contain all the key points needed to answer the user's question; further elaboration on these key points can be omitted. 177 | Please rate whether this answer is suitable for the question. Please note that the gold answer can be considered as a correct answer to the question. 178 | 179 | The assistant receives an overall score on a scale of 1 to 100, where a higher score indicates better overall performance. 180 | Please note that if the assistant's answer and the gold answer fully meet the above criteria, its overall rating should be the full marks (100). 181 | Please first provide a comprehensive explanation of your evaluation, avoiding any potential bias. 182 | Then, output a line indicating the score of the Assistant. 183 | 184 | PLEASE OUTPUT WITH THE FOLLOWING FORMAT, WHERE THE SCORE IS A SCALE OF 1 TO 100 BY STRICTLY FOLLOWING THIS FORMAT: "[[score]]", FOR EXAMPLE "Rating: [[100]]": 185 | 186 | Evaluation evidence: your evluation explanation here, no more than 100 words 187 | Rating: [[score]] 188 | 189 | 190 | Now, start your evaluation:''' 191 | prompts = [] 192 | lines = open(args.output_path).readlines() 193 | for line in lines: 194 | line = json.loads(line.strip()) 195 | line.pop('docs', '') 196 | doc_type, question, instruction = line['type'], line['question'], line['instruction'] 197 | prompt_template = line['prompt_template'] 198 | if doc_type != "paper": 199 | prompt_template = prompt_template.replace("{docs}", "") 200 | question = prompt_template.replace("{question}", question).replace("{instruction}", instruction) 201 | answer = line['answer'] 202 | predict = line[tag] 203 | line['prompt'] = prompt.format(question, answer, predict) 204 | prompts.append(line) 205 | return prompts 206 | -------------------------------------------------------------------------------- /Loong/src/utils/token_length.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") 3 | def token_length(text): 4 | return len(encoding.encode(text, disallowed_special=())) 5 | 6 | if __name__ == "__main__": 7 | res = token_length("{docs}") 8 | print(res) -------------------------------------------------------------------------------- /Loong/src/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | from colorlog import ColoredFormatter 5 | 6 | 7 | def count_lines(file_path): 8 | with open(file_path, 'r', encoding='utf-8') as file: 9 | return sum(1 for _ in file) 10 | 11 | 12 | def create_path(file_path): 13 | directory = os.path.dirname(file_path) 14 | if not os.path.exists(directory): 15 | os.makedirs(directory) 16 | 17 | 18 | def continue_gen(input_path, gen_data, tag): 19 | seen_id = dict() 20 | with open(input_path, 'r') as f: 21 | for item in f.readlines(): 22 | js = json.loads(item.strip()) 23 | if js[tag]: 24 | seen_id[js['id']] = js 25 | rewrite_data, continue_generate_data = [], [] 26 | seen_rewrite = set() 27 | for item in gen_data: 28 | _id = item['id'] 29 | if _id in seen_rewrite: 30 | continue 31 | if _id not in seen_id: 32 | continue_generate_data.append(item) 33 | else: 34 | rewrite_data.append(seen_id[_id]) 35 | # dedup 36 | seen_rewrite.add(_id) 37 | with open(input_path, 'w') as f: 38 | for item in rewrite_data: 39 | f.write(json.dumps(item, ensure_ascii=False) + '\n') 40 | print(f"continue_gen: input_path={input_path}, rewrite_data_num={len(rewrite_data)}, tag={tag}") 41 | return continue_generate_data 42 | 43 | 44 | 45 | def setup_logger(name='Loong', level=logging.DEBUG): 46 | # create 47 | logger = logging.getLogger(name) 48 | logger.setLevel(level) 49 | 50 | # Avoid adding repeatedly 51 | if not logger.hasHandlers(): 52 | # log level 53 | console_handler = logging.StreamHandler() 54 | console_handler.setLevel(level) 55 | 56 | # color 57 | formatter = ColoredFormatter( 58 | '%(log_color)s%(asctime)s (%(name)s - %(levelname)s) %(message)s', 59 | datefmt='%Y-%m-%d %H:%M:%S', 60 | log_colors={ 61 | 'DEBUG': 'green', 62 | 'INFO': 'green', 63 | 'WARNING': 'yellow', 64 | 'ERROR': 'red', 65 | 'CRITICAL': 'bold_red', 66 | } 67 | ) 68 | 69 | # 将格式设置到处理器 70 | console_handler.setFormatter(formatter) 71 | 72 | # 将处理器添加到记录器 73 | logger.addHandler(console_handler) 74 | 75 | return logger 76 | 77 | logger = setup_logger() -------------------------------------------------------------------------------- /Loong/src/vllm_example.sh: -------------------------------------------------------------------------------- 1 | # For Qwen2, you can enable the long-context capabilities by following these steps. 2 | # modify the config.json file by including the below snippet: 3 | """ 4 | { 5 | "architectures": [ 6 | "Qwen2ForCausalLM" 7 | ], 8 | // ... 9 | "vocab_size": 152064, 10 | 11 | // adding the following snippets 12 | "rope_scaling": { 13 | "factor": 4.0, 14 | "original_max_position_embeddings": 32768, 15 | "type": "yarn" 16 | } 17 | } 18 | """ 19 | # For details, refer to https://huggingface.co/Qwen/Qwen2-72B-Instruct. 20 | 21 | # python -m vllm.entrypoints.openai.api_server \ 22 | # --served-model-name Qwen2-72B-Instruct \ 23 | # --model "Your Checkpoint path" \ 24 | # --tensor-parallel-size=8 \ 25 | # --trust-remote-code 26 | 27 | python -m vllm.entrypoints.openai.api_server \ 28 | --served-model-name glm4-9b-1m \ 29 | --model "Your Checkpoint path" \ 30 | --tensor-parallel-size=8 \ 31 | --trust-remote-code -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StructRAG 2 | StructRAG: Boosting Knowledge Intensive Reasoning of LLMs via Inference-time Hybrid Information Structurization 3 | 4 | https://arxiv.org/abs/2410.08815 5 | 6 | ## 0. Environment 7 | ``` 8 | python 3.8.19 9 | vllm 0.6.3.post1 10 | pip install -r requirement.txt 11 | ``` 12 | 13 | ## 1. Data Preparation 14 | ``` 15 | please follow Loong/README.md 16 | ``` 17 | 18 | ## 2. StructRAG Inference 19 | ```python 20 | # 1. launch llm api server 21 | model_path = "/mnt/data/lizhuoqun/hf_models/Qwen2-72B-Instruct" 22 | CUDA_VISIBLE_DEVICES=0,1,2,3 && OUTLINES_CACHE_DIR=tmp && nohup python -m vllm.entrypoints.openai.api_server --model ${model_path} --served-model-name Qwen --tensor-parallel-size 4 --port 1225 --disable-custom-all-reduce > vllm.log 23 | # 2. run StructRAG 24 | python main.py --url {url_of_api_server} # output will be in ./eval_results/qwen/loong 25 | # 3. transform model output to Loong results format 26 | python do_merge_each_batch.py # results will be in ./Loong/output/qwen 27 | ``` 28 | 29 | ## 3. Results Evaluation 30 | ``` 31 | cd Loong/src && bash run.sh 32 | ``` 33 | 34 | ## 4. Router Training (optional) 35 | Qwen2-72B-Instruct has already achieved good routing performance under the few-shot examples setting. If wish to further improve routing accuracy, we can train the 7B model using the DPO algorithm: 36 | ``` 37 | bash train_router/train.sh 38 | ``` 39 | 40 | After training, deploy the output model as an API using vllm, and obtain url_of_router. When running StructRAG, use the following command: 41 | ``` 42 | python main.py --url {url_of_api_server} --router_url {url_of_router} 43 | ``` -------------------------------------------------------------------------------- /do_merge_each_batch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | model_name = "qwen" 5 | git_hash = "" 6 | suffix = "" 7 | 8 | if os.path.exists(f"./Loong/output/{model_name}/loong_generate.jsonl"): 9 | raise ValueError(f"File already exists: ./Loong/output/{model_name}/loong_generate.jsonl") 10 | if os.path.exists(f"./Loong/output/{model_name}/loong_evaluate.jsonl"): 11 | raise ValueError(f"File already exists: ./Loong/output/{model_name}/loong_evaluate.jsonl") 12 | 13 | total_datas = [] 14 | 15 | dir_path = f"./eval_results{git_hash}/{model_name}/loong{suffix}" 16 | 17 | if os.path.exists(f"{dir_path}/final_output_0.jsonl"): 18 | a_s = [json.loads(line) for line in open(f"{dir_path}/final_output_0.jsonl")] 19 | print(len(a_s)) 20 | total_datas += a_s 21 | 22 | if os.path.exists(f"{dir_path}/final_output_1.jsonl"): 23 | b_s = [json.loads(line) for line in open(f"{dir_path}/final_output_1.jsonl")] 24 | print(len(b_s)) 25 | total_datas += b_s 26 | 27 | if os.path.exists(f"{dir_path}/final_output_2.jsonl"): 28 | c_s = [json.loads(line) for line in open(f"{dir_path}/final_output_2.jsonl")] 29 | print(len(c_s)) 30 | total_datas += c_s 31 | 32 | if os.path.exists(f"{dir_path}/final_output_3.jsonl"): 33 | d_s = [json.loads(line) for line in open(f"{dir_path}/final_output_3.jsonl")] 34 | print(len(d_s)) 35 | total_datas += d_s 36 | 37 | if os.path.exists(f"{dir_path}/final_output_4.jsonl"): 38 | e_s = [json.loads(line) for line in open(f"{dir_path}/final_output_4.jsonl")] 39 | print(len(e_s)) 40 | total_datas += e_s 41 | 42 | if os.path.exists(f"{dir_path}/final_output_5.jsonl"): 43 | f_s = [json.loads(line) for line in open(f"{dir_path}/final_output_5.jsonl")] 44 | print(len(f_s)) 45 | total_datas += f_s 46 | 47 | if os.path.exists(f"{dir_path}/final_output_6.jsonl"): 48 | g_s = [json.loads(line) for line in open(f"{dir_path}/final_output_6.jsonl")] 49 | print(len(g_s)) 50 | total_datas += g_s 51 | 52 | if os.path.exists(f"{dir_path}/final_output_7.jsonl"): 53 | h_s = [json.loads(line) for line in open(f"{dir_path}/final_output_7.jsonl")] 54 | print(len(h_s)) 55 | total_datas += h_s 56 | 57 | print("len(total_datas)", len(total_datas)) 58 | 59 | fw = open(f"./Loong/output/{model_name}/loong_generate.jsonl", "w") 60 | for t in total_datas: 61 | fw.write(json.dumps(t) + "\n") 62 | fw.close() -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import copy 4 | import time 5 | import tqdm 6 | import random 7 | random.seed(1024) 8 | import argparse 9 | 10 | from utils.qwenapi import QwenAPI 11 | 12 | from router import Router 13 | from structurizer import Structurizer 14 | from utilizer import Utilizer 15 | 16 | if __name__ == '__main__': 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--llm_name", type=str, default="qwen") 20 | parser.add_argument("--dataset_name", type=str, default="loong") 21 | parser.add_argument("--url", type=str, default="10.32.15.63:1225") 22 | parser.add_argument("--router_url", type=str, default=None) 23 | parser.add_argument("--worker_id", type=int, choices=[0, 1, 2, 3, 4, 5, 6, 7], default=0) 24 | parser.add_argument("--start_bias", type=int, default=0) # used to manually skip last time error data 25 | parser.add_argument("--output_path_suffix", type=str, default="") 26 | args = parser.parse_args() 27 | 28 | for k, v in vars(args).items(): 29 | print(f"{k}: {v}") 30 | print('\nstart...') 31 | 32 | main_llm = QwenAPI(url=f"http://{args.url}/v1/chat/completions") 33 | if args.router_url is None: 34 | router_llm = QwenAPI(url=f"http://{args.url}/v1/chat/completions") 35 | else: 36 | router_llm = QwenAPI(url=f"http://{args.router_url}/v1/chat/completions") 37 | 38 | eval_data_path = "./Loong/data/loong_process.jsonl" 39 | eval_datas = [json.loads(l) for l in open(eval_data_path)] 40 | random.shuffle(eval_datas) 41 | eval_datas = eval_datas[200*args.worker_id+args.start_bias : 200*(args.worker_id+1)] 42 | print(f"len eval_datas: {len(eval_datas)}") 43 | 44 | intermediate_results_dir = f"./intermediate_results/{args.llm_name}/{args.dataset_name}{args.output_path_suffix}" 45 | os.makedirs(intermediate_results_dir) if not os.path.exists(intermediate_results_dir) else None 46 | 47 | chunk_kb_path = f"{intermediate_results_dir}/chunk_kb" 48 | graph_kb_path = f"{intermediate_results_dir}/graph_kb" 49 | table_kb_path = f"{intermediate_results_dir}/table_kb" 50 | algorithm_kb_path = f"{intermediate_results_dir}/algorithm_kb" 51 | catalogue_kb_path = f"{intermediate_results_dir}/catalogue_kb" 52 | os.makedirs(chunk_kb_path) if not os.path.exists(chunk_kb_path) else None 53 | os.makedirs(graph_kb_path) if not os.path.exists(graph_kb_path) else None 54 | os.makedirs(table_kb_path) if not os.path.exists(table_kb_path) else None 55 | os.makedirs(algorithm_kb_path) if not os.path.exists(algorithm_kb_path) else None 56 | os.makedirs(catalogue_kb_path) if not os.path.exists(catalogue_kb_path) else None 57 | 58 | output_dir = f"./eval_results/{args.llm_name}/{args.dataset_name}{args.output_path_suffix}" 59 | os.makedirs(output_dir) if not os.path.exists(output_dir) else None 60 | fw = open(f"{output_dir}/final_output_{args.worker_id}.jsonl", "a") 61 | fw_error = open(f"{output_dir}/final_output_error_{args.worker_id}.jsonl", "a") 62 | exiting_data = [json.loads(l) for l in open(f"{output_dir}/final_output_{args.worker_id}.jsonl")] 63 | exiting_data_ids = [d["id"] for d in exiting_data] 64 | 65 | router = Router(router_llm) 66 | structurizer = Structurizer(main_llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path) 67 | utilizer = Utilizer(main_llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path) 68 | 69 | for i, data in enumerate(eval_datas): # data: {"instruction": "", "question": "", "docs": "", "prompt_template": "{},{},{}"} 70 | if data["id"] in exiting_data_ids: 71 | print(f"################## Skipping {i}th data existing... ##################") 72 | continue 73 | print(f"################## Processing {i}th data... ##################") 74 | 75 | try: 76 | current_time = time.time() 77 | fw_intermediate = open(f"{intermediate_results_dir}/{data['id']}.jsonl", "w") 78 | 79 | query = data['prompt_template'].format(instruction=data['instruction'], question=data['question'], docs="......") 80 | _, titles = structurizer.split_content_and_tile(data['docs']) 81 | core_content = "The titles of the docs are: " + "\n".join(list(set(titles))) 82 | 83 | # 1. router 84 | chosen = router.do_route(query, core_content, data['id']) 85 | fw_intermediate.write(json.dumps({"query": query, "chosen": chosen}, ensure_ascii=False) + "\n") 86 | fw_intermediate.flush() 87 | 88 | # 2. structurizer 89 | instruction, kb_info = structurizer.construct(query, chosen, data['docs'], data['id']) 90 | fw_intermediate.write(json.dumps({"instruction": instruction, "kb_info": kb_info}, ensure_ascii=False) + "\n") 91 | fw_intermediate.flush() 92 | 93 | # 3. utilizer 94 | subqueries = utilizer.do_decompose(query, kb_info, data['id']) 95 | fw_intermediate.write(json.dumps({"subqueries": subqueries}, ensure_ascii=False) + "\n") 96 | fw_intermediate.flush() 97 | subknowledges = utilizer.do_extract(query, subqueries, chosen, data['id']) 98 | fw_intermediate.write(json.dumps({"subknowledges": subknowledges}, ensure_ascii=False) + "\n") 99 | fw_intermediate.flush() 100 | answer, _, _ = utilizer.do_merge(query, subqueries, subknowledges, chosen, data['id']) 101 | fw_intermediate.write(json.dumps({"answer": answer}, ensure_ascii=False) + "\n") 102 | fw_intermediate.flush() 103 | 104 | used_time = (time.time() - current_time) / 60 105 | print(f"level:{data['level']},set:{data['set']},type:{data['type']}") 106 | print(f"used time: {used_time:.2f} min") 107 | 108 | data['generate_response'] = answer 109 | data['used_time'] = used_time 110 | fw.write(json.dumps(data, ensure_ascii=False) + "\n") 111 | fw.flush() 112 | 113 | except Exception as e: 114 | print(f"(print in main.py) Error: {e}") 115 | data['generate_response'] = "meet error" 116 | data['used_time'] = -100 117 | fw_error.write(json.dumps(data, ensure_ascii=False) + "\n") 118 | fw_error.flush() 119 | 120 | print("all done") -------------------------------------------------------------------------------- /prompts/README.md: -------------------------------------------------------------------------------- 1 | # Prompts used in StructRAG 2 | 3 | **router.txt**: Used to guide the router to determine the optimal structure type 4 | 5 | **decompose.txt**: Used to guide the utility to decompose or rewrite the original complex problem 6 | 7 | **construct_table.txt**: Used to guide the constructor to build the original document as a table type knowledge 8 | 9 | **construct_graph.txt**: Used to guide the constructor to build the original document into graph type knowledge 10 | 11 | **construct_catalogue.txt**: Used to guide the constructor to build the original document into knowledge of the catalog type 12 | 13 | **constructor_algorithm.txt**: Used to guide the constructor to build the original document into algorithm type knowledge -------------------------------------------------------------------------------- /prompts/construct_algorithm.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | Extract the required algorithmic pseudocode from Raw Content based on the requirements described in the Requirement 3 | It is required to follow the thinking method and output format in Examples, and each action in the pseudocode should be associated with specific information in the original document 4 | Note that if the requirements described in the Requirement cannot be solved in the form of pseudocode, do not forcefully write pseudocode. You can directly list the information that can solve the requirements 5 | 6 | Examples: 7 | ################# 8 | ################# 9 | Requirement: 10 | Given the document set: Intel CPU user manual, Huawei CPU user manual, Apple CPU user manual, Huawei monitor user manual, graphics card manual, host manual, fan manual, and more, extract the necessary algorithm pseudo-code for assembling a computer based on user-customized requirements. 11 | 12 | Raw Content: 13 | Intel CPU user manual: Our CPU features high performance, advanced multi-core processing, and power efficiency. 14 | Huawei CPU user manual: The Huawei CPU offers excellent thermal management and AI-powered multitasking. 15 | Apple CPU user manual: Apple CPUs are known for their efficiency in graphics rendering and seamless integration with macOS. 16 | Huawei monitor user manual: Huawei monitors offer high refresh rate (144Hz) and excellent color accuracy. 17 | Graphics card manual: The graphics card supports high resolution and fast processing for gaming. Models include: 18 | GTX 1650: Mid-range graphics card suitable for light gaming. 19 | RTX 3060: High-performance card for gaming and content creation with ray tracing support and DLSS technology for improved performance. 20 | RTX 4090: Top-tier card for gaming at ultra settings and 4K resolution with ray tracing support and DLSS technology. 21 | Host manual: The host supports modular installation of various components with different color and material options. 22 | Options: Mid-tower, Full-tower, RGB lighting, black, white, or custom colors and materials (glass, metal, plastic). 23 | Fan manual: The fan provides optimal cooling for high-performance CPUs, available in different sizes: 24 | 120mm: Standard cooling for general use and light gaming applications with low noise levels .... 25 | 140mm: High-efficiency cooling for overclocked systems and high-performance gaming with moderate noise levels .... 26 | ....... 27 | 28 | Output: 29 | 1. Initialize components: 30 | - Intel_CPU = "Intel high performance multi-core CPU" 31 | - Huawei_CPU = "Huawei AI-powered multitasking CPU with thermal management" 32 | - Apple_CPU = "Apple efficient CPU with seamless macOS integration" 33 | - Huawei_Monitor = "Huawei 144Hz, color-accurate monitor" 34 | - Standard_Monitor = "Standard 60Hz monitor" 35 | - GTX_1650 = "GTX 1650 mid-range graphics card" 36 | - RTX_3060 = "RTX 3060 high-performance graphics card" 37 | - RTX_4090 = "RTX 4090 top-tier graphics card" 38 | - Host_Mid_Tower = "Mid-tower modular host" 39 | - Host_Full_Tower = "Full-tower modular host" 40 | - Host_RGB_Tower = "RGB-lit tower" 41 | - Fan_120mm = "120mm standard cooling fan" 42 | - Fan_140mm = "140mm high-efficiency fan" 43 | 2. Evaluate user requirements: 44 | if need high-performance CPU: 45 | - IF user needs high refresh rate display: 46 | - selected_CPU = Intel_CPU # According to original content, Intel CPU is high performance 47 | - selected_monitor = Huawei_Monitor # According to original content, Huawei monitor offers high refresh rate 48 | - IF user needs ultra-high graphics performance (4K gaming): # According to original content, RTX 4090 is top-tier card for 4K gaming 49 | - selected_GPU = RTX_4090 # According to original content,RTX 4090 is top-tier card for 4K gaming 50 | - install(selected_CPU) 51 | - install(selected_monitor) 52 | - install(selected_GPU) 53 | - install(Host_Full_Tower) 54 | - install(Fan_140mm) 55 | ELSE IF user needs solid gaming performance (1080p to 1440p): # According to original content, RTX 3060 is high-performance card for gaming 56 | - selected_GPU = RTX_3060 # According to original content, RTX 3060 is high-performance card for gaming 57 | - install(selected_CPU) 58 | - install(selected_monitor) 59 | - install(selected_GPU) 60 | - install(Host_Mid_Tower) 61 | - install(Fan_120mm) 62 | ELSE: 63 | - selected_GPU = GTX_1650 # According to original content, GTX 1650 is mid-range card suitable for light gaming 64 | - install(selected_CPU) 65 | - install(selected_monitor) 66 | - install(selected_GPU) 67 | - install(Host_Mid_Tower) 68 | - install(Fan_120mm) 69 | ELSE IF user prefers general high performance without high refresh rate: # According to original content, Huawei CPU offers thermal management and multitasking 70 | - selected_CPU = Intel_CPU # According to original content, Intel CPU is high performance 71 | - selected_monitor = Standard_Monitor # According to original content, standard monitor is 60Hz 72 | - selected_GPU = GTX_1650 73 | - install(selected_CPU) 74 | - install(selected_monitor) 75 | - install(selected_GPU) 76 | - install(Host_Mid_Tower) 77 | - install(Fan_120mm) 78 | ELSE IF need thermal management and multitasking CPU: # According to original content, Huawei CPU offers thermal management and multitasking 79 | - selected_CPU = Huawei_CPU # According to original content, Huawei CPU offers thermal management and multitasking 80 | IF user needs efficient graphics: # According to original content, RTX 3060 is high-performance card for gaming and content creation 81 | - selected_GPU = RTX_3060 # According to original content, RTX 3060 is high-performance card for gaming and content creation 82 | - install(selected_CPU) 83 | - install(selected_GPU) 84 | - install(Host_Mid_Tower) 85 | - install(Fan_140mm) 86 | ELSE: 87 | - selected_GPU = GTX_1650 # According to original content, GTX 1650 is mid-range card suitable for light gaming 88 | - install(selected_CPU) 89 | - install(selected_GPU) 90 | - install(Host_Mid_Tower) 91 | - install(Fan_120mm) 92 | ELSE IF need macOS integration and efficiency: # According to original content, Apple CPU is efficient with macOS integration 93 | - selected_CPU = Apple_CPU # According to original content, Apple CPU is efficient with macOS integration 94 | IF user needs efficient graphics rendering: # According to original content, RTX 3060 is high-performance card for gaming and content creation 95 | - selected_GPU = RTX_3060 # According to original content, RTX 3060 is high-performance card for gaming and content creation 96 | - install(selected_CPU) 97 | - install(selected_GPU) 98 | - install(Host_Mid_Tower) 99 | - install(Fan_120mm) 100 | ELSE: 101 | - selected_GPU = GTX_1650 # According to original content, GTX 1650 is mid-range card suitable for light gaming 102 | - install(selected_CPU) 103 | - install(selected_GPU) 104 | - install(Host_Mid_Tower) 105 | - install(Fan_120mm) 106 | 3. Evaluate aesthetics and color preferences: 107 | IF user prefers RGB lighting: # According to original content, RGB lighting is an option for the host 108 | - selected_host = Host_RGB_Tower 109 | ELSE IF user prefers a sleek design: # According to original content, black and white are color options for the host 110 | - selected_host = Host_Full_Tower 111 | ELSE: 112 | - selected_host = Host_Mid_Tower 113 | 4. Final system check: 114 | - power_on() 115 | - verify_CPU_performance() 116 | - verify_display_quality() 117 | - test_GPU_performance() 118 | - check_fan_speed_and_efficiency() 119 | ################# 120 | ################# 121 | 122 | Requirement: 123 | {requirement} 124 | 125 | Raw Content: 126 | {raw_content} 127 | 128 | Output: -------------------------------------------------------------------------------- /prompts/construct_catalogue.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | Extract the required directory structure from Raw Content based on the requirements described in the Requirement, which is a hierarchical summary. The number of layers and the number of nodes in each layer are determined according to specific circumstances. 3 | Please follow the thinking style and output format in Examples, and note that each level of Summary needs to have a number to distinguish between different levels. And each summary needs to be very detailed. 4 | Note that you need to extract as much relevant information as possible from the Raw Content based on the entity names and person names mentioned in the Retirement, in order to build a complete directory structure. 5 | 6 | Examples: 7 | ################# 8 | ################# 9 | Requirement: 10 | Query is How do guests perceive the impact of privacy laws on technology development?, please extract relevant catalogues from the document based on the Query. 11 | 12 | Raw Content: 13 | Episode 48 - Randall Munroe 14 | RANDALL MUNROE: Then fell into doing comics where I can spend all day diving into some rabbit hole, and then draw comics about it. And then the next day, move on to a different thing. I found a way to grab all the candy in the candy store. 15 | KEVIN SCOTT: Hi, everyone. Welcome to Behind the Tech. I’m your host, Kevin Scott, Chief Technology Officer for Microsoft. 16 | In this podcast, we’re going to get behind the tech. We’ll talk with some of the people who have made our modern tech world possible and understand what motivated them to create what they did. So, join me to maybe learn a little bit about the history of computing and get a few behind-the-scenes insights into what’s happening today. Stick around. 17 | CHRISTINA WARREN: Hello, and welcome to Behind the Tech. I’m Christina Warren, Senior Developer Advocate at GitHub. 18 | KEVIN SCOTT: And I’m Kevin Scott. 19 | CHRISTINA WARREN: And today, we have a super exciting guest with us, Randall Munroe. He’s famous for creating the webcomic xkcd 20 | KEVIN SCOTT: Yeah, it’s – Randall is maybe my favorite cartoonist. So I very rarely post cartoons onto social media; 100% of them are Randall’s stuff. 21 | CHRISTINA WARREN: 100%. 22 | KEVIN SCOTT: And so, once a month, once every other month, like he’ll write something that I just think is so fabulously funny that I have to share it with my other nerd friends. 23 | CHRISTINA WARREN: No, I mean, well – well that – well, that’s what makes the comic so good is – and – and you know, I’m interested to hear what you two talk about. But it’s such a – it covers such a wide spectrum of – of nerd-adjacent topics. And – and because the comic’s been going on so long, they’re literally – it’s like The Simpsons There literally is one for everything. You know, like there’s – there’s an xkcd that you can apply to any situation. 24 | ....... 25 | ....... 26 | ....... 27 | 28 | Output: 29 | In question in requirement, the main topic is about the impact of privacy laws on technology development. Thus we extract "impact of privacy laws on technology development" related information from the raw content. And constructed a hierarchical summary based on the extracted information. 30 | 1. First-Level Summary 1: AI Technology and Regulatory Challenges 31 | • The podcast explores the complex relationship between AI advancements and existing legal frameworks, with a particular focus on privacy laws like HIPAA and how they interact with technological innovation. 32 | (1) Second-Level Summary 1: Regulatory Concerns in Financial Services 33 | • Ethan Mollick highlights concerns that the current regulatory environment in financial services is not well-suited to address the unique challenges posed by AI, particularly the uncertainty surrounding the applicability of existing regulations. 34 | • (a) Third-Level Summary 1: Innovation Hindered by Regulatory Ambiguity 35 | • Mollick discusses how the lack of clarity in regulations impedes the ability of industries, like finance, to fully harness the potential of AI technologies. 36 | • (b) Third-Level Summary 1: Need for Adaptive Regulations 37 | • He advocates for a more dynamic and responsive regulatory framework that can evolve alongside technological advancements, ensuring both safety and innovation. 38 | (2) Second-Level Summary 2: AI in Healthcare and Privacy Concerns 39 | • The podcast also delves into the intersection of AI experimentation in healthcare and the need to comply with privacy regulations like HIPAA. 40 | • (a) Third-Level Summary 2: Balancing Privacy and AI Benefits 41 | • Discussions emphasize the challenge of ensuring privacy while leveraging AI to improve healthcare systems and access to medical services. 42 | • (b) Third-Level Summary 2: Ethical Considerations in AI Use 43 | • Mollick touches on concerns over AI misuse, such as “data rape,” and underscores the importance of regulating AI to promote positive outcomes while preventing harmful practices. 44 | 2. First-Level Summary 2: The Call for Responsive AI Regulation 45 | • Mollick and other guests advocate for a regulatory approach that allows for experimentation and innovation, particularly in areas like healthcare, while mitigating potential risks. 46 | (1) Second-Level Summary 1: The Need for Smart and Responsive Regulation 47 | • Mollick calls for a “fast, smart, responsive regulation” that monitors emerging harms in AI and carves out space for experimentation in critical sectors like medicine. 48 | • (a) Third-Level Summary 1: Evolving with Technological Advancements 49 | • He stresses that regulations must evolve as quickly as the technology itself to ensure they are effective in addressing both the opportunities and risks associated with AI. 50 | (2) Second-Level Summary 2: AI as a General-Purpose Technology 51 | • The conversation highlights the far-reaching implications of AI, recognizing it as a general-purpose technology with the potential to significantly impact various sectors. 52 | • (a) Third-Level Summary 2: Promoting Innovation While Protecting Rights 53 | • Experts argue that while privacy laws are crucial to prevent misuse, they must also be flexible enough to allow for innovation, ensuring AI’s positive potential is not stifled. 54 | • (b) Third-Level Summary 2: The Need for Balance 55 | • The guests suggest that a balanced approach to regulation is necessary, one that promotes innovation while protecting individual rights and societal interests. 56 | 3. First-Level Summary 3: Conclusion on the Future of AI Regulation 57 | • The episode concludes with a call for a balanced regulatory framework that can adapt to the evolving nature of AI, ensuring that both privacy and innovation are protected. 58 | (1) Second-Level Summary 1: Regulatory Agility for AI’s Future 59 | • Experts emphasize that regulations must be agile enough to keep pace with AI developments, ensuring that the technology can be used safely while minimizing potential harms. 60 | • (a) Third-Level Summary 1: Agility in Regulation 61 | • The need for regulatory frameworks that evolve in tandem with technological advancements is underscored as a key factor in supporting AI’s positive societal impact. 62 | ################# 63 | ################# 64 | 65 | Requirement: 66 | {requirement} 67 | 68 | Raw Content: 69 | {raw_content} 70 | 71 | Output: -------------------------------------------------------------------------------- /prompts/construct_graph.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | Extract the required triplets from Raw Content according to the requirements described in the Requirement 3 | The output of a triplet is in the format of {{'head ':'... ',' relation ':'... ',' tail ': [...', '...']}}. 4 | Note that not all triples in the text need to be extracted. You need to analyze the relationships and entities mentioned in the Requirement and only extract the relevant triples 5 | Note that the head and tail you output should be kept as complete as possible. They may not be just a word or phrase, but can also be a sentence or a paragraph of text. Try to be consistent with the original text and do not make any abbreviations. 6 | 7 | Examples: 8 | ################# 9 | ################# 10 | Requirement: 11 | It is necessary to construct a graph based on a given document, where the entity is the title of the paper, the relationship is a reference, and the title of the given document is used as the head, while the titles of other papers are used as the tail 12 | 13 | Noting: 14 | You only need to consider the following paper titles, 15 | Generative AI and Large Language Models for Cyber Security: All Insights You Need 16 | WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW 17 | Can Large Language Models Be an Alternative to Human Evaluations? 18 | LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning 19 | Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers 20 | 21 | Raw Content: 22 | # Generative AI and Large Language Models for Cyber Security: All Insights You Need 23 | Mohamed Amine Ferrag, Fatima Alwahedi, Ammar Battah, Bilel Cherif, Abdechakour Mechri,
and Norbert Tihanyi 24 | #### Abstract 25 | The rapid evolution of cyber threats requires innovative approaches to enhance cybersecurity defenses. In this paper, 26 | Index Terms-Generative AI, LLM, Transformer, Security, Cyber Security. 27 | M. A. Ferrag is the corresponding author. 28 | ## LIST OF ABBREVIATIONS 29 | AI Artificial Intelligence 30 | ## I. INTRODUCTION 31 | The history of Natural Language Processing (NLP) dates back to the 1950s when the Turing test was developed. However, NLP has seen significant advancements in 32 | [141] ZySec-AI, "Zysec-ai: Project zysec," Webpage, accessed: 2024-05-01. [Online]. Available: https://github.com/ZySec-AI/project-zysec 33 | [205] M. Bhatt, S. Chennabasappa, C. Nikolaidis, S. Wan, I. Evtimov, D. Gabi, D. Song, F. Ahmad, C. Aschermann, L. Fontana et al., "Purple llama cyberseceval: A secure coding benchmark for language models," arXiv preprint arXiv:2312.04724, 2023. 34 | [206] Z. Liu, "Secqa: A concise question-answering dataset for evaluating large language models in computer security," arXiv preprint arXiv:2312.15838, 2023. 35 | [207] M. Bhatt, S. Chennabasappa, Y. Li, C. Nikolaidis, D. Song, S. Wan, F. Ahmad, C. Aschermann, Y. Chen, D. Kapil, D. Molnar, S. Whitman, and J. Saxe, "Cyberseceval 2: A wide-ranging cybersecurity evaluation suite for large language models," 2024. 36 | [208] N. Li, A. Pan, A. Gopal, S. Yue, D. Berrios, A. Gatti, J. D. Li, A.K. Dombrowski, S. Goel, L. Phan et al., "The wmdp benchmark: Measuring and reducing malicious use with unlearning," arXiv preprint arXiv:2403.03218, 2024. 37 | [209] Y. Sun, D. Wu, Y. Xue, H. Liu, W. Ma, L. Zhang, M. Shi, and Y. Liu, "Llm4vuln: A unified evaluation framework for decoupling and enhancing llms\' vulnerability reasoning," 2024. 38 | [210] Z. Liu, J. Shi, and J. F. Buford, "Cyberbench: A multi-task benchmark for evaluating large language models in cybersecurity." [Online]. Available: http://aics.site/AICS2024/AICS_CyberBench.pdf 39 | 40 | Output: 41 | Among the paper titles that need to be considered, "Generative AI and Large Language Models for Cyber Security: All Insights You Need" is the title of the given document, so it should be used as the head. Among the other paper titles that need to be considered, "Llm4vuln: A unified evaluation framework for decoupling and enhancing llms \'vulnerability reasoning" appears in the reference of the given document, so it should be used as the tail. The remaining paper titles that need to be considered do not appear in the given document, so they are not considered. 42 | {{"head": "Generative AI and Large Language Models for Cyber Security: All Insights You Need", "relation": "reference", "tail": ["Llm4vuln: A unified evaluation framework for decoupling and enhancing llms\' vulnerability reasoning"]}} 43 | ################# 44 | ################# 45 | 46 | Requirement: 47 | It is necessary to construct a graph based on a given document, where the entity is the title of the paper, the relationship is a reference, and the title of the given document is used as the head, while the titles of other papers are used as the tail 48 | 49 | Noting: 50 | You only need to consider the following paper titles, 51 | Generative AI and Large Language Models for Cyber Security: All Insights You Need 52 | WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW 53 | Can Large Language Models Be an Alternative to Human Evaluations? 54 | LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning 55 | Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers 56 | 57 | Raw Content: 58 | # LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs\' Vulnerability Reasoning 59 | Daoyuan $\\mathrm{{Wu}}^{{*}}$
Nanyang Technological University
Singapore, Singapore
daoyuan.wu@ntu.edu.sg
Wei Ma
Nanyang Technological University
Singapore, Singapore
ma_wei@ntu.edu.sg 60 | Yue Xue
MetaTrust Labs
Singapore, Singapore
xueyue@metatrust.io
Lyuye Zhang
Nanyang Technological University
Singapore, Singapore
zh0004ye@e.ntu.edu.sg 61 | Miaolei Shi
MetaTrust Labs
Singapore, Singapore
stan@metatrust.io 62 | Yang Liu
Nanyang Technological University
Singapore, Singapore
yangliu@ntu.edu.sg 63 | #### Absract 64 | Large language models (LLMs) have demonstrated significant potential for many downstream tasks, including those requiring humanlevel intelligence, such as vulnerability detection. However, recent attempts to use LLMs for vulnerability detection are still preliminary, as they lack an in-depth understanding of a subject LLM\'s vulnerability reasoning capability - whether it originates from the model itself or from external assistance, such as invoking tool support and retrieving vulnerability knowledge. 65 | ## REFERENCES 66 | [1] 2023. Ethereum Whitepaper. https://ethereum.org/whitepaper 67 | [2] 2023. Solidity Programming Language. https://soliditylang.org 68 | [21] Yizheng Chen, Zhoujie Ding, Lamya Alowain, Xinyun Chen, and David Wagner 2023. DiverseVul: A New Vulnerable Source Code Dataset for Deep Learning Based Vulnerability Detection. In Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses. ACM, Hong Kong China, 654-668. https://doi.org/10.1145/3607199.3607242 69 | [22] Cheng-Han Chiang and Hung-yi Lee. 2023. Can Large Language Models Be an Alternative to Human Evaluations?. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Anna Rogers, Jordan Boyd-Graber, and Naoaki Okazaki (Eds.). Association for Computational Linguistics, Toronto, Canada, 15607-15631. https://doi.org/10.18653/v1/2023.acllong. 870 70 | [23] Damai Dai, Yutao Sun, Li Dong, Yaru Hao, Shuming Ma, Zhifang Sui, and Furu Wei 2023. Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers. arXiv:2212.10559 (May 2023). https //doi.org/10.48550/arXiv.2212.10559 arXiv:2212.10559 [cs]. 71 | 72 | Output: 73 | Among the paper titles that need to be considered, 'LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning' is the title of the given document, so it should be used as the head. Among the other paper titles that need to be considered, 'Why Can GPT Learn In Context?'? Language Models Implicitly Perform Gradient Descent as Meta Optizers "and" Can Large Language Models Be an Alternative to Human Evaluations? "Appear in the references of the given document, so they should be considered as tails. The remaining paper titles that need to be considered are not included in the given document, so they are not considered. 74 | {{"head": "LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs\' Vulnerability Reasoning", "relation": "reference", "tail": ["Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers.", "Can Large Language Models Be an Alternative to Human Evaluations?"]}} 75 | ################# 76 | ################# 77 | 78 | Requirement: 79 | {requirement} 80 | 81 | Noting: 82 | You only need to consider the following paper titles, 83 | {titles} 84 | 85 | Raw Content: 86 | {raw_content} 87 | 88 | Output: -------------------------------------------------------------------------------- /prompts/construct_table.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | Extract complete relevant tables from Raw Content based on the requirements described in the Requirement. 3 | Note that when building a table, it is important to retain the table title and source information, such as which company and report the table comes from. 4 | 5 | Hints: 6 | Firstly, identify the keywords in the Requirement, including entity names and attribute names, and then extract them from the Raw Content based on these keywords. 7 | If the Raw Content does not contain the information required by the Requirement, then extract the small amount of information most relevant to the Requirement from the Raw Content 8 | 3. When analyzing Requirements and extracting Raw Content, do not translate and maintain the original language 9 | 10 | Raw Content: 11 | {content} 12 | 13 | Requirement: 14 | {instruction} 15 | 16 | Output: -------------------------------------------------------------------------------- /prompts/decompose.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | In order to solve complex document-related problems, you need to break down the given Query into multiple relatively simple and independent sub-problems. 3 | 4 | Requirement: 5 | 1. Doc Info is a description of the document information. You can reference this information to implement the breakdown. 6 | 2. If the given problem is already simple enough or there is no way to break it down, then no breakdown is needed. 7 | 3. Follow the output format and thought process in the Examples, and do not provide any additional explanatory notes. 8 | 9 | Examples: 10 | ################# 11 | ################# 12 | Doc Info: 13 | The titles of the docs are: "Judgment Document 7"\n"Judgment Document 3"\n"Judgment Document 2"\n"Judgment Document 4"\n"Judgment Document 6"\n"Judgment Document 8"\n"Judgment Document 5"\n"Judgment Document 1" 14 | 15 | Query: 16 | ... Please read the above judgment documents and classify all the judgment documents according to the following 6 types of case causes: 'Property Dispute', 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)', 'Cause for Execution - Administrative Non-Litigation Execution', 'Corruption and Bribery', 'Cause for Execution - Other Causes', and 'Administrative Action - Administrative Payment'. You only need to output the title of each judgment document, following the format below with titles as per the judgment documents provided:\n{{'Property Dispute': ['Judgment Document a', 'Judgment Document b'], 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)': ['Judgment Document a', 'Judgment Document b'], 'Cause for Execution - Administrative Non-Litigation Execution': ['Judgment Document a', 'Judgment Document b'], 'Corruption and Bribery': ['Judgment Document a', 'Judgment Document b'], 'Cause for Execution - Other Causes': ['Judgment Document a', 'Judgment Document b'], 'Administrative Action - Administrative Payment': ['Judgment Document a', 'Judgment Document b']}}\nYour answer is: 17 | 18 | Output: 19 | determine whether the cause of each given judgment document is 'Property Dispute', 20 | determine whether the cause of each given judgment document is 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)', 21 | determine whether the cause of each given judgment document is 'Cause for Execution - Administrative Non-Litigation Execution', 22 | determine whether the cause of each given judgment document is 'Corruption and Bribery', 23 | determine whether the cause of each given judgment document is 'Cause for Execution - Other Causes', 24 | determine whether the cause of each given judgment document is 'Administrative Action - Administrative Payment'. 25 | 26 | ################# 27 | Doc Info: 28 | The titles of the docs are: "Judgment Document 7"\n"Judgment Document 3"\n"Judgment Document 1"\n"Judgment Document 2"\n"Judgment Document 4"\n"Judgment Document 5"\n\n"Judgment Document 6" 29 | 30 | Query: 31 | ...... Based solely on the judgment documents seen above, answer the following question.\n\nRead the above judgment documents, and I will provide you with several judgment results: {{\'Judgment Result 1\': \'1. Uphold the decision of the Huainan Xiejiaji District People's Court of Anhui Province 20....\', \'Judgment Result 2\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 13,800 yuan,....', \'Judgment Result 3\': \'Uphold the Civil Judgement No. 4850 (2020) of the Tianjin Third Intermediate People's Court.\\nThis judgment is final.\', \'Judgment Result 4\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 50 yuan, paid by the appellant Li (prepaid by the appellant).\\nThis judgment is final.\'}}.\nYou need to determine which judgment result is most likely for each of the above judgment documents. Please output in the following JSON format:\n{{"Judgment Document 1":"Judgment Result a", "Judgment Document 2":"Judgment Result b", "Judgment Document 3":"Judgment Result c"}}\nOnly provide the judgment document titles and the judgment result numbers, not the specific content.\nPlease directly provide the answer: 32 | 33 | Output: 34 | I will provide you with several judgment results: {{\'Judgment Result 1\': \'1. Uphold the decision of the Huainan Xiejiaji District People's Court of Anhui Province 20....\', \'Judgment Result 2\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 13,800 yuan,....', \'Judgment Result 3\': \'Uphold the Civil Judgement No. 4850 (2020) of the Tianjin Third Intermediate People's Court.\\nThis judgment is final.\', \'Judgment Result 4\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 50 yuan, paid by the appellant Li (prepaid by the appellant).\\nThis judgment is final.\'}}.\nYou need to determine which judgment result is most likely for the given judgment document. 35 | 36 | ################# 37 | Doc Info: 38 | The titles of the docs are: "2020 Report"\n"2025 Fourth Quarter Report" 39 | 40 | Query: 41 | ... Please answer which company has the highest 'Total Owner's Equity'? 42 | 43 | Output: 44 | Identify the company with the highest 'Total Owner's Equity'. 45 | 46 | ################# 47 | Doc Info: 48 | The titles of the docs are: "WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW"\n"Generative AI and Large Language Models for Cyber Security: All Insights You Need"\n"LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning" 49 | 50 | Query: 51 | ...Paper: OpenMoE: An Early Effort on Open Mixture-of-Experts Language Models. "instruction": We hope you will carefully study the provided papers and determine the citation relationships between them...... 52 | 53 | Output: 54 | Find the required triples with entities as the given papers and the relationship as citation. 55 | 56 | ################# 57 | ################# 58 | 59 | Doc Info: 60 | {kb_info} 61 | 62 | Query: 63 | {query} 64 | 65 | Output: -------------------------------------------------------------------------------- /prompts/route.txt: -------------------------------------------------------------------------------- 1 | Instruction: 2 | To generate answers to questions based on documents, I need to structure the documents as a table, graph, or chunk. 3 | Generally speaking, statistical questions prefer tables, chain reasoning questions prefer graphs, and single-hop QA questions prefer chunks. 4 | Now, given the following document information and question, please determine which type of knowledge structure I should use. 5 | Simply output one of the three words: table, graph, or chunk, without providing any further explanation. 6 | 7 | Examples: 8 | ================= 9 | ================= 10 | Doc Info: 11 | "2024 Financial Report" 12 | 13 | Query: 14 | ......What is the amount of Mango Excellent Media's trading financial assets? Please read the financial statements of the above-mentioned companies and answer the following questions based only on the content seen above. You can ignore parts related to financial reports that are not mentioned in the questions and only answer the other parts. 15 | 16 | Output: 17 | table 18 | 19 | ================= 20 | Doc Info: 21 | "Judgment Document 1" "Judgment Document 2" "Judgment Document 3" "Judgment Document 4" "Judgment Document 5" 22 | 23 | Query: 24 | ......Which judgment document among the above has the cause of action as 'Administrative Action - Administrative Registration'? Please answer the question based only on the judgment documents seen above, and simply provide the title of the judgment document that meets the criteria. Based solely on the content seen in the judgment documents, I will give you several judgment results: {{'Judgment Result 1': '1. Uphold the decision of the Xiejiaji District People’s Court of Huainan City, Anhui Province (20....', 'Judgment Result 2': 'Dismiss the appeal and uphold the original judgment.\nThe second-instance case acceptance fee is 13,800 yuan,....', 'Judgment Result 3': 'Uphold the civil judgment of the Third Intermediate People’s Court of Tianjin (2020) Jin 03 Min Zhong 4850 No.\nThis judgment is final.', 'Judgment Result 4': 'Dismiss the appeal and uphold the original judgment.\nThe second-instance case acceptance fee is 50 yuan, to be borne by the appellant Li (the appellant has prepaid).\nThis judgment is final.'}}. You need to determine which judgment result is the most likely for all the above judgment documents. Please output in the following JSON format: {{"Judgment Document 1": "Judgment Result a", "Judgment Document 2": "Judgment Result b", "Judgment Document 3": "Judgment Result c"}} Only provide the judgment document titles and judgment result numbers; specific content output is not required. 25 | 26 | Output: 27 | chunk 28 | 29 | ================= 30 | Doc Info: 31 | Patchscopes: A Unifying Framework for Inspecting Hidden Representations of Language Models\n Llama: Efficient Permutation Importance Sampling\n Vicuna: Visual Contextualization and Navigation for Large-scale Codebases 32 | 33 | Query: 34 | '#Papers:\n......\n\nWe hope you will carefully study the provided papers and determine the citation relationships between them. Please follow the instructions below strictly to complete the task:\n\n#Specific Requirements:\n1. Reference: When a given paper mentions other provided papers, those other papers are considered as "references" for the given paper. To summarize in this specific context, references are about what the given paper is using.\n2. Citation: Conversely, when other provided papers mention the given paper in their works, the given paper is being "cited" by those other papers. To summarize in this specific context, citations are about who is using the given paper.\n3. Given a paper, you need to determine the citation or reference relationship between this paper and the other papers. Do not consider papers that are not provided.\n3. Please present the paper titles in a json format as follows: {{"Reference":["Reference Title 1", "Reference Title 2", ..., "Reference Title n"], "Citation":["Citation Title 1", "Citation Title 2", ..., "Citation Title n"]}}.\n4. If a paper does not have any references or citations, please leave the corresponding list empty, e.g.{{"Refernce":[]}}, {{"Citation":[]}}.\n\n#The paper you need to analyze:\nPatchscopes: A Unifying Framework for Inspecting Hidden Representations of Language Models' 35 | 36 | Output: 37 | graph 38 | ================= 39 | ================= 40 | 41 | Doc Info: 42 | {titles} 43 | 44 | Query: 45 | {query} 46 | 47 | Output: -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.31.0 2 | aiohttp==3.9.5 3 | aiosignal==1.3.1 4 | annotated-types==0.7.0 5 | anthropic==0.30.1 6 | anyio==4.4.0 7 | async-timeout==4.0.3 8 | attrs==23.2.0 9 | beautifulsoup4==4.12.3 10 | certifi==2024.6.2 11 | charset-normalizer==3.3.2 12 | click==8.1.7 13 | colorlog==6.8.2 14 | dashscope==1.20.1 15 | datasets==2.21.0 16 | deepspeed==0.15.0 17 | dill==0.3.8 18 | distro==1.9.0 19 | docker-pycreds==0.4.0 20 | docstring_parser==0.16 21 | eval_type_backport==0.2.0 22 | exceptiongroup==1.2.1 23 | faiss-gpu==1.7.2 24 | filelock==3.15.4 25 | frozenlist==1.4.1 26 | fsspec==2024.5.0 27 | gitdb==4.0.11 28 | GitPython==3.1.43 29 | google==3.0.0 30 | gritlm==1.0.0 31 | h11==0.14.0 32 | hjson==3.1.0 33 | httpcore==1.0.5 34 | httpx==0.27.0 35 | huggingface-hub==0.23.4 36 | idna==3.7 37 | Jinja2==3.1.4 38 | jiter==0.5.0 39 | joblib==1.4.2 40 | jsonlines==4.0.0 41 | logger==1.4 42 | markdown-it-py==3.0.0 43 | MarkupSafe==2.1.5 44 | mdurl==0.1.2 45 | modelscope==1.20.1 46 | mpmath==1.3.0 47 | mteb==1.12.54 48 | multidict==6.0.5 49 | multiprocess==0.70.16 50 | networkx==3.1 51 | ninja==1.11.1.1 52 | nltk==3.8.1 53 | numpy==1.24.4 54 | nvidia-cublas-cu12==12.1.3.1 55 | nvidia-cuda-cupti-cu12==12.1.105 56 | nvidia-cuda-nvrtc-cu12==12.1.105 57 | nvidia-cuda-runtime-cu12==12.1.105 58 | nvidia-cudnn-cu12==8.9.2.26 59 | nvidia-cufft-cu12==11.0.2.54 60 | nvidia-curand-cu12==10.3.2.106 61 | nvidia-cusolver-cu12==11.4.5.107 62 | nvidia-cusparse-cu12==12.1.0.106 63 | nvidia-nccl-cu12==2.20.5 64 | nvidia-nvjitlink-cu12==12.5.40 65 | nvidia-nvtx-cu12==12.1.105 66 | openai==1.35.3 67 | packaging==24.1 68 | pandas==2.0.3 69 | peft==0.12.0 70 | pillow==10.3.0 71 | platformdirs==4.2.2 72 | polars==0.20.31 73 | protobuf==5.27.2 74 | psutil==6.0.0 75 | py-cpuinfo==9.0.0 76 | pyarrow==16.1.0 77 | pyarrow-hotfix==0.6 78 | pydantic==2.7.4 79 | pydantic_core==2.18.4 80 | Pygments==2.18.0 81 | python-dateutil==2.9.0.post0 82 | pytrec-eval-terrier==0.5.6 83 | pytz==2024.1 84 | PyYAML==6.0.1 85 | regex==2024.5.15 86 | requests==2.32.3 87 | rich==13.7.1 88 | safetensors==0.4.3 89 | scikit-learn==1.3.2 90 | scipy==1.10.1 91 | sentence-transformers==3.0.1 92 | sentry-sdk==2.6.0 93 | setproctitle==1.3.3 94 | shtab==1.7.1 95 | six==1.16.0 96 | smmap==5.0.1 97 | sniffio==1.3.1 98 | soupsieve==2.5 99 | sympy==1.12.1 100 | threadpoolctl==3.5.0 101 | tiktoken==0.7.0 102 | tokenizers==0.19.1 103 | torch==2.3.1 104 | torchaudio==2.3.1 105 | torchvision==0.18.1 106 | tqdm==4.66.4 107 | transformers==4.41.2 108 | triton==2.3.1 109 | trl==0.10.1 110 | typing_extensions==4.12.2 111 | tyro==0.8.10 112 | tzdata==2024.1 113 | urllib3==2.2.2 114 | wandb==0.17.8 115 | websocket-client==1.8.0 116 | xxhash==3.4.1 117 | yarl==1.9.4 118 | -------------------------------------------------------------------------------- /router.py: -------------------------------------------------------------------------------- 1 | class Router: 2 | def __init__(self, llm): 3 | self.llm = llm 4 | 5 | def do_route(self, query, core_content, data_id): 6 | print(f"data_id: {data_id}, do_route...") 7 | 8 | raw_prompt = open("prompts/route.txt", "r").read() 9 | 10 | prompt = raw_prompt.format( 11 | query=query, 12 | titles=core_content 13 | ) 14 | output = self.llm.response(prompt) 15 | 16 | if "table" in output.lower(): 17 | chosen = "table" 18 | elif "graph" in output.lower(): 19 | chosen = "graph" 20 | elif "algorithm" in output.lower(): 21 | chosen = "algorithm" 22 | elif "catalogue" in output.lower(): 23 | chosen = "catalogue" 24 | else: 25 | chosen = "chunk" 26 | 27 | return chosen -------------------------------------------------------------------------------- /structurizer.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Structurizer: 5 | def __init__(self, llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path): 6 | self.llm = llm 7 | self.chunk_kb_path = chunk_kb_path 8 | self.graph_kb_path = graph_kb_path 9 | self.table_kb_path = table_kb_path 10 | self.algorithm_kb_path = algorithm_kb_path 11 | self.catalogue_kb_path = catalogue_kb_path 12 | 13 | def construct(self, query, chosen, docs, data_id): 14 | print(f"data_id: {data_id}, construct...") 15 | 16 | if chosen == "graph": 17 | instruction = f"Based on the given document, construct a graph where entities are the titles of papers and the relation is 'reference', using the given document title as the head and other paper titles as tails." 18 | info_of_graph = self.do_construct_graph(instruction, docs, data_id) 19 | return instruction, info_of_graph 20 | elif chosen == "table": 21 | instruction = f"Query is {query}, please extract relevant complete tables from the document based on the attributes and keywords mentioned in the Query. Note: retain table titles and source information." 22 | info_of_table = self.do_construct_table(instruction, docs, data_id) 23 | return instruction, info_of_table 24 | elif chosen == "algorithm": 25 | instruction = f"Query is {query}, please extract relevant algorithms from the document based on the Query." 26 | info_of_algorithm = self.do_construct_algorithm(instruction, docs, data_id) 27 | return instruction, info_of_algorithm 28 | elif chosen == "catalogue": 29 | instruction = f"Query is {query}, please extract relevant catalogues from the document based on the Query." 30 | info_of_catalogue = self.do_construct_catalogue(instruction, docs, data_id) 31 | return instruction, info_of_catalogue 32 | elif chosen == "chunk": 33 | instruction = f"construct chunk" 34 | info_of_chunk = self.do_construct_chunk(instruction, docs, data_id) 35 | return instruction, info_of_chunk 36 | else: 37 | raise ValueError("chosen should be in ['graph', 'table', 'algorithm', 'catalogue', 'chunk']") 38 | 39 | def do_construct_graph(self, instruction, docs, data_id): 40 | print(f"data_id: {data_id}, do_construct_graph...") 41 | docs, titles = self.split_content_and_tile(docs) 42 | 43 | graphs = [] 44 | info_of_graph = "" 45 | raw_prompt = open("prompts/construct_graph.txt", "r").read() 46 | for d, doc in enumerate(docs): 47 | print(f"data_id: {data_id}, do_construct_graph... in doc {d}/{len(docs)} in docs ..") 48 | title = doc['title'] 49 | content = doc['document'] 50 | 51 | prompt = raw_prompt.format( 52 | requirement=instruction, 53 | raw_content=content, 54 | titles="\n".join(titles) 55 | ) 56 | output = self.llm.response(prompt) 57 | info_of_graph += output.split("\n")[0][:128] 58 | graphs.append(f"{title}: {output}") 59 | 60 | output_path = f"{self.graph_kb_path}/data_{data_id}.json" 61 | json.dump(graphs, open(output_path, "w"), ensure_ascii=False, indent=4) 62 | 63 | return info_of_graph 64 | 65 | def do_construct_table(self, instruction, docs, data_id): 66 | print(f"data_id: {data_id}, do_construct_table...") 67 | docs, titles = self.split_content_and_tile(docs) 68 | 69 | tables = [] 70 | info_of_table = "" 71 | raw_prompt = open("prompts/construct_table.txt", "r").read() 72 | for d, doc in enumerate(docs): 73 | print(f"data_id: {data_id}, do_construct_table... in doc {d}/{len(docs)} in docs ..") 74 | title = doc['title'] 75 | content = doc['document'] 76 | prompt = raw_prompt.format( 77 | instruction=instruction, 78 | content=content 79 | ) 80 | output = self.llm.response(prompt) 81 | info_of_table += output.split("\n")[0][:128] 82 | tables.append(f"{title}: {output}") 83 | 84 | output_path = f"{self.table_kb_path}/data_{data_id}.json" 85 | json.dump(tables, open(output_path, "w"), ensure_ascii=False, indent=4) 86 | 87 | return info_of_table 88 | 89 | def do_construct_chunk(self, instruction, docs, data_id): 90 | print(f"data_id: {data_id}, do_construct_chunk...") 91 | docs, titles = self.split_content_and_tile(docs) 92 | 93 | chunks = [] 94 | for doc in docs: 95 | title = doc['title'] 96 | content = doc['document'] 97 | chunks.append(f"{title}: {content}") 98 | 99 | output_path = f"{self.chunk_kb_path}/data_{data_id}.json" 100 | json.dump(chunks, open(output_path, "w"), ensure_ascii=False, indent=4) 101 | 102 | info_of_chunk = " ".join(titles) 103 | return info_of_chunk 104 | 105 | def do_construct_algorithm(self, instruction, docs, data_id): 106 | print(f"data_id: {data_id}, do_construct_algorithm...") 107 | docs, titles = self.split_content_and_tile(docs) 108 | 109 | algorithms = [] 110 | info_of_algorithm = "" 111 | raw_prompt = open("prompts/construct_algorithm.txt", "r").read() 112 | for d, doc in enumerate(docs): 113 | print(f"data_id: {data_id}, do_construct_algorithm... in doc {d}/{len(docs)} in docs ..") 114 | title = doc['title'] 115 | content = doc['document'] 116 | prompt = raw_prompt.format( 117 | requirement=instruction, 118 | raw_content=content 119 | ) 120 | output = self.llm.response(prompt) 121 | info_of_algorithm += output.split("\n")[0][:128] 122 | algorithms.append(f"{title}: {output}") 123 | 124 | output_path = f"{self.algorithm_kb_path}/data_{data_id}.json" 125 | json.dump(algorithms, open(output_path, "w"), ensure_ascii=False, indent=4) 126 | 127 | return info_of_algorithm 128 | 129 | def do_construct_catalogue(self, instruction, docs, data_id): 130 | print(f"data_id: {data_id}, do_construct_catalogue...") 131 | docs, titles = self.split_content_and_tile(docs) 132 | 133 | instruction = instruction.split("Query:\n")[1] 134 | 135 | catalogues = [] 136 | info_of_catalogue = "" 137 | raw_prompt = open("prompts/construct_catalogue.txt", "r").read() 138 | for d, doc in enumerate(docs): 139 | print(f"data_id: {data_id}, do_construct_catalogue... in doc {d}/{len(docs)} in docs ..") 140 | title = doc['title'] 141 | document = doc['document'] 142 | 143 | len_document = len(document) 144 | contents = [document] 145 | 146 | for c, content in enumerate(contents): 147 | print(f"data_id: {data_id}, do_construct_catalogue... in doc {d}/{len(docs)} in docs .. in content {c}/{len(contents)} in contents ..") 148 | prompt = raw_prompt.format( 149 | requirement=instruction, 150 | raw_content=content 151 | ) 152 | output = self.llm.response(prompt) 153 | info_of_catalogue += output.split("\n")[0][:128] 154 | catalogues.append(f"\n\n{title}: {output}") 155 | 156 | output_path = f"{self.catalogue_kb_path}/data_{data_id}.json" 157 | json.dump(catalogues, open(output_path, "w"), ensure_ascii=False, indent=4) 158 | 159 | return info_of_catalogue 160 | 161 | def split_content_and_tile(self, docs_): 162 | docs = [] 163 | titles = [] 164 | 165 | raw_doc_list = docs_.strip("<标题起始符>").split("<标题起始符>") 166 | 167 | for raw_doc in raw_doc_list: 168 | title = raw_doc.split('<标题终止符>')[0].strip() 169 | content = raw_doc.split('<标题终止符>')[1].strip() 170 | 171 | docs.append({'title': title, 'document': content}) 172 | titles.append(title) 173 | 174 | return docs, titles 175 | -------------------------------------------------------------------------------- /train_router/accelerate_configs/deepspeed_zero1.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | deepspeed_config: 4 | deepspeed_multinode_launcher: standard 5 | gradient_accumulation_steps: 1 6 | zero3_init_flag: false 7 | zero_stage: 1 8 | distributed_type: DEEPSPEED 9 | downcast_bf16: 'no' 10 | machine_rank: 0 11 | main_training_function: main 12 | mixed_precision: 'bf16' 13 | num_machines: 1 14 | num_processes: 8 15 | rdzv_backend: static 16 | same_network: true 17 | tpu_env: [] 18 | tpu_use_cluster: false 19 | tpu_use_sudo: false 20 | use_cpu: false 21 | -------------------------------------------------------------------------------- /train_router/accelerate_configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | deepspeed_config: 4 | deepspeed_multinode_launcher: standard 5 | offload_optimizer_device: none 6 | offload_param_device: none 7 | zero3_init_flag: false 8 | zero_stage: 2 9 | distributed_type: DEEPSPEED 10 | downcast_bf16: 'no' 11 | machine_rank: 0 12 | main_training_function: main 13 | mixed_precision: 'bf16' 14 | num_machines: 1 15 | num_processes: 8 16 | rdzv_backend: static 17 | same_network: true 18 | tpu_env: [] 19 | tpu_use_cluster: false 20 | tpu_use_sudo: false 21 | use_cpu: false 22 | -------------------------------------------------------------------------------- /train_router/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | deepspeed_config: 4 | deepspeed_multinode_launcher: standard 5 | offload_optimizer_device: none 6 | offload_param_device: none 7 | zero3_init_flag: true 8 | zero3_save_16bit_model: true 9 | zero_stage: 3 10 | distributed_type: DEEPSPEED 11 | downcast_bf16: 'no' 12 | machine_rank: 0 13 | main_training_function: main 14 | mixed_precision: bf16 15 | num_machines: 1 16 | num_processes: 8 17 | rdzv_backend: static 18 | same_network: true 19 | tpu_env: [] 20 | tpu_use_cluster: false 21 | tpu_use_sudo: false 22 | use_cpu: false 23 | -------------------------------------------------------------------------------- /train_router/accelerate_configs/fsdp_qlora.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | distributed_type: FSDP 4 | downcast_bf16: 'no' 5 | fsdp_config: 6 | fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP 7 | fsdp_backward_prefetch: BACKWARD_PRE 8 | fsdp_cpu_ram_efficient_loading: true 9 | fsdp_forward_prefetch: false 10 | fsdp_offload_params: true 11 | fsdp_sharding_strategy: FULL_SHARD 12 | fsdp_state_dict_type: SHARDED_STATE_DICT 13 | fsdp_sync_module_states: true 14 | fsdp_use_orig_params: false 15 | machine_rank: 0 16 | main_training_function: main 17 | mixed_precision: 'bf16' 18 | num_machines: 1 19 | num_processes: 8 20 | rdzv_backend: static 21 | same_network: true 22 | tpu_env: [] 23 | tpu_use_cluster: false 24 | tpu_use_sudo: false 25 | use_cpu: false -------------------------------------------------------------------------------- /train_router/accelerate_configs/multi_gpu.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | distributed_type: MULTI_GPU 4 | downcast_bf16: 'no' 5 | gpu_ids: all 6 | machine_rank: 0 7 | main_training_function: main 8 | mixed_precision: 'bf16' 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | same_network: true 13 | tpu_env: [] 14 | tpu_use_cluster: false 15 | tpu_use_sudo: false 16 | use_cpu: false 17 | -------------------------------------------------------------------------------- /train_router/accelerate_configs/single_gpu.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | distributed_type: "NO" 4 | downcast_bf16: 'no' 5 | gpu_ids: all 6 | machine_rank: 0 7 | main_training_function: main 8 | mixed_precision: 'bf16' 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | same_network: true 13 | tpu_env: [] 14 | tpu_use_cluster: false 15 | tpu_use_sudo: false 16 | use_cpu: false 17 | -------------------------------------------------------------------------------- /train_router/dpo.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | # regular: 17 | python examples/scripts/dpo.py \ 18 | --dataset_name=trl-internal-testing/hh-rlhf-helpful-base-trl-style \ 19 | --model_name_or_path=gpt2 \ 20 | --per_device_train_batch_size 4 \ 21 | --learning_rate 1e-3 \ 22 | --gradient_accumulation_steps 1 \ 23 | --logging_steps 10 \ 24 | --eval_steps 500 \ 25 | --output_dir="dpo_anthropic_hh" \ 26 | --warmup_steps 150 \ 27 | --report_to wandb \ 28 | --bf16 \ 29 | --logging_first_step \ 30 | --no_remove_unused_columns 31 | 32 | # peft: 33 | python examples/scripts/dpo.py \ 34 | --dataset_name=trl-internal-testing/hh-rlhf-helpful-base-trl-style \ 35 | --model_name_or_path=gpt2 \ 36 | --per_device_train_batch_size 4 \ 37 | --learning_rate 1e-3 \ 38 | --gradient_accumulation_steps 1 \ 39 | --logging_steps 10 \ 40 | --eval_steps 500 \ 41 | --output_dir="dpo_anthropic_hh" \ 42 | --optim rmsprop \ 43 | --warmup_steps 150 \ 44 | --report_to wandb \ 45 | --bf16 \ 46 | --logging_first_step \ 47 | --no_remove_unused_columns \ 48 | --use_peft \ 49 | --lora_r=16 \ 50 | --lora_alpha=16 51 | """ 52 | 53 | import logging 54 | import multiprocessing 55 | import os 56 | os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" 57 | os.environ["WANDB_DISABLED"] = "true" 58 | from contextlib import nullcontext 59 | 60 | from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser 61 | from trl.env_utils import strtobool 62 | 63 | TRL_USE_RICH = strtobool(os.getenv("TRL_USE_RICH", "0")) 64 | 65 | if TRL_USE_RICH: 66 | init_zero_verbose() 67 | FORMAT = "%(message)s" 68 | 69 | from rich.console import Console 70 | from rich.logging import RichHandler 71 | 72 | import torch 73 | from datasets import load_dataset, load_from_disk 74 | from transformers import AutoModelForCausalLM, AutoTokenizer 75 | from accelerate import PartialState 76 | from trl import ( 77 | DPOConfig, 78 | DPOTrainer, 79 | ModelConfig, 80 | RichProgressCallback, 81 | get_kbit_device_map, 82 | get_peft_config, 83 | get_quantization_config, 84 | ) 85 | 86 | 87 | if TRL_USE_RICH: 88 | logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()], level=logging.INFO) 89 | 90 | 91 | if __name__ == "__main__": 92 | parser = TrlParser((DPOScriptArguments, DPOConfig, ModelConfig)) 93 | args, training_args, model_config = parser.parse_args_and_config() 94 | 95 | # show all arguments 96 | print(args) 97 | print(training_args) 98 | print(model_config) 99 | 100 | # Force use our print callback 101 | if TRL_USE_RICH: 102 | training_args.disable_tqdm = True 103 | console = Console() 104 | 105 | ################ 106 | # Model & Tokenizer 107 | ################ 108 | torch_dtype = ( 109 | model_config.torch_dtype 110 | if model_config.torch_dtype in ["auto", None] 111 | else getattr(torch, model_config.torch_dtype) 112 | ) 113 | quantization_config = get_quantization_config(model_config) 114 | model_kwargs = dict( 115 | revision=model_config.model_revision, 116 | attn_implementation=model_config.attn_implementation, 117 | torch_dtype=torch_dtype, 118 | use_cache=False if training_args.gradient_checkpointing else True, 119 | device_map=get_kbit_device_map() if quantization_config is not None else None, 120 | quantization_config=quantization_config, 121 | ) 122 | model = AutoModelForCausalLM.from_pretrained( 123 | model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, **model_kwargs 124 | ) 125 | peft_config = get_peft_config(model_config) 126 | if peft_config is None: 127 | ref_model = AutoModelForCausalLM.from_pretrained( 128 | model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, **model_kwargs 129 | ) 130 | else: 131 | ref_model = None 132 | tokenizer = AutoTokenizer.from_pretrained( 133 | model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code 134 | ) 135 | if tokenizer.pad_token is None: 136 | tokenizer.pad_token = tokenizer.eos_token 137 | if tokenizer.chat_template is None: 138 | tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}" 139 | if tokenizer.bos_token is None: # qwen没有bos_token,要设置一下,不然dpo train时会报错。 140 | tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token}) 141 | tokenizer.bos_token_id = tokenizer.eos_token_id 142 | if args.ignore_bias_buffers: 143 | # torch distributed hack 144 | model._ddp_params_and_buffers_to_ignore = [ 145 | name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool 146 | ] 147 | 148 | ################ 149 | # Optional rich context managers 150 | ############### 151 | init_context = nullcontext() if not TRL_USE_RICH else console.status("[bold green]Initializing the DPOTrainer...") 152 | save_context = ( 153 | nullcontext() 154 | if not TRL_USE_RICH 155 | else console.status(f"[bold green]Training completed! Saving the model to {training_args.output_dir}") 156 | ) 157 | 158 | ################ 159 | # Dataset 160 | ################ 161 | if "hh-rlhf-helpful-base-trl-style" in args.dataset_name: # official case 162 | ds = load_dataset(args.dataset_name) 163 | # ds = load_from_disk(args.dataset_name) 164 | else: 165 | data_files = {"train": f"{args.dataset_name}/train.json", "test": f"{args.dataset_name}/test.json"} 166 | ds = load_dataset("json", data_files=data_files) 167 | # ds = load_from_disk(args.dataset_name) 168 | if args.sanity_check: 169 | for key in ds: 170 | ds[key] = ds[key].select(range(500)) 171 | 172 | def process(row): 173 | row["prompt"] = tokenizer.apply_chat_template(row["chosen"][:-1], tokenize=False) 174 | row["chosen"] = tokenizer.apply_chat_template([row["chosen"][-1]], tokenize=False) 175 | row["rejected"] = tokenizer.apply_chat_template([row["rejected"][-1]], tokenize=False) 176 | return row 177 | 178 | # Compute that only on the main process for faster data processing. 179 | # see: https://github.com/huggingface/trl/pull/1255 180 | with PartialState().local_main_process_first(): 181 | ds = ds.map(process, num_proc=training_args.dataset_num_proc) 182 | 183 | train_dataset = ds[args.dataset_train_split] 184 | eval_dataset = ds[args.dataset_test_split] 185 | 186 | ################ 187 | # Training 188 | ################ 189 | with init_context: 190 | trainer = DPOTrainer( 191 | model, 192 | ref_model, 193 | args=training_args, 194 | train_dataset=train_dataset, 195 | eval_dataset=eval_dataset, 196 | tokenizer=tokenizer, 197 | peft_config=peft_config, 198 | callbacks=[RichProgressCallback] if TRL_USE_RICH else None, 199 | ) 200 | 201 | trainer.train() 202 | 203 | with save_context: 204 | trainer.save_model(training_args.output_dir) -------------------------------------------------------------------------------- /train_router/train.sh: -------------------------------------------------------------------------------- 1 | # export HF_ENDPOINT=https://hf-mirror.com 2 | 3 | wandb disabled 4 | 5 | if command -v nvidia-smi &> /dev/null; then 6 | NUM_GPUS=$(nvidia-smi -L | wc -l) 7 | echo "发现 ${NUM_GPUS} 块 GPU。" 8 | else 9 | NUM_GPUS=-1 10 | echo "nvidia-smi 命令不可用,请确保 NVIDIA 驱动已安装。" 11 | fi 12 | 13 | 14 | model_name=qwen 15 | model_path=/mnt/data/hf_models/Qwen2-7B-Instruct 16 | 17 | dataset_name=weak 18 | dataset_path=data/${dataset_name} 19 | 20 | config_file=deepspeed_zero2 21 | 22 | tag=${dataset_name}_${model_name}_${config_file} 23 | 24 | echo "dataset_name ${dataset_name}, tag ${tag}" 25 | 26 | accelerate launch --config_file accelerate_configs/${config_file}.yaml --num_processes ${NUM_GPUS} dpo.py \ 27 | --dataset_name ${dataset_path} \ 28 | --model_name_or_path ${model_path} \ 29 | --num_train_epochs 3 \ 30 | --per_device_train_batch_size 1 \ 31 | --learning_rate 1e-5 \ 32 | --gradient_accumulation_steps 8 \ 33 | --logging_steps 3 \ 34 | --eval_steps 5 \ 35 | --output_dir output_model/${tag} \ 36 | --warmup_steps 5 \ 37 | --report_to none \ 38 | --bf16 \ 39 | --logging_first_step \ 40 | --max_prompt_length 512 \ 41 | --max_length 512 \ 42 | --no_remove_unused_columns > log/train_log/${tag}.log 2>&1 43 | 44 | echo "Done." 45 | -------------------------------------------------------------------------------- /utilizer.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Utilizer(): 5 | def __init__(self, llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path): 6 | self.llm = llm 7 | self.chunk_kb_path = chunk_kb_path 8 | self.graph_kb_path = graph_kb_path 9 | self.table_kb_path = table_kb_path 10 | self.algorithm_kb_path = algorithm_kb_path 11 | self.catalogue_kb_path = catalogue_kb_path 12 | 13 | def do_decompose(self, query, kb_info, data_id): 14 | print(f"data_id: {data_id}, do_decompose...") 15 | 16 | raw_prompt = open("prompts/decompose.txt", "r").read() 17 | prompt = raw_prompt.format( 18 | query=query, 19 | kb_info=kb_info 20 | ) 21 | output = self.llm.response(prompt) 22 | subqueries = output.split("\n") 23 | 24 | return subqueries 25 | 26 | def do_extract(self, query, subqueries, chosen, data_id, extra_instruction=None): 27 | print(f"data_id: {data_id}, extraction...") 28 | 29 | if extra_instruction != None: 30 | subqueries = [subquery + extra_instruction for subquery in subqueries] 31 | 32 | if chosen == "chunk": 33 | subknowledges = self.do_extract_chunk(query, subqueries, data_id) 34 | elif chosen == "table": 35 | subknowledges = self.do_extract_table(query, subqueries, data_id) 36 | elif chosen == "graph": 37 | subknowledges = self.do_extract_graph(query, subqueries, data_id) 38 | elif chosen == "algorithm": 39 | subknowledges = self.do_extract_algorithm(query, subqueries, data_id) 40 | elif chosen == "catalogue": 41 | subknowledges = self.do_extract_catalogue(query, subqueries, data_id) 42 | else: 43 | raise ValueError("chosen should be in ['chunk', 'table', 'graph', 'algorithm', 'catalogue']") 44 | 45 | return subknowledges 46 | 47 | def do_extract_chunk(self, query, subqueries, data_id): 48 | chunks = json.load(open(f"{self.chunk_kb_path}/data_{data_id}.json")) 49 | 50 | composed_query = "\n".join(subqueries) 51 | 52 | subknowledges = [] 53 | for c, chunk in enumerate(chunks): 54 | print(f"retrieve chunk {c}/{len(chunks)} in chunks ..") 55 | 56 | prompt = f"Instruction:\nAnswer the Query based on the given Document.\n\nQuery:\n{composed_query}\n\nDocument:\n{chunk}\n\nOutput:" 57 | tmp_output = self.llm.response(prompt) 58 | title = chunk.split(":")[0] 59 | subknowledges.append(f"Retrieval result for {title}: {tmp_output}") 60 | 61 | return subknowledges 62 | 63 | def do_extract_table(self, query, subqueries, data_id): 64 | print(f"data_id: {data_id}, do_extract_table...") 65 | 66 | tables = json.load(open(f"{self.table_kb_path}/data_{data_id}.json")) 67 | tables_content = "" 68 | for t, table in enumerate(tables): 69 | tables_content += f"Table {t+1}:\n{table}\n\n" 70 | 71 | subknowledges = [] 72 | for s, subquery in enumerate(subqueries): 73 | print(f"data_id: {data_id}, do_extract_table... in subquery {s}/{len(subqueries)} in subqueries ..") 74 | prompt = f"Instruction:\nThe following Tables show multiple independent tables built from multiple documents.\nFilter these tables according to the query, retaining only the table information that helps answer the query.\nNote that you need to analyze the attributes and entities mentioned in the query and filter accordingly.\nThe information needed to answer the query must exist in one or several tables, and you need to check these tables one by one.\n\nTables:{tables_content}\n\nQuery:{subquery}\n\nOutput:" 75 | retrieval = self.llm.response(prompt) 76 | subknowledges.append(retrieval) 77 | 78 | return subknowledges 79 | 80 | def do_extract_graph(self, query, subqueries, data_id): 81 | print(f"data_id: {data_id}, do_extract_graph...") 82 | 83 | graphs = json.load(open(f"{self.graph_kb_path}/data_{data_id}.json")) 84 | graphs_content = "\n\n".join(graphs) 85 | 86 | subknowledges = [] 87 | for s, subquery in enumerate(subqueries): 88 | print(f"data_id: {data_id}, do_extract_graph... in subquery {s}/{len(subqueries)} in subqueries ..") 89 | prompt = f"Instruction: According to the query, filter out the triples from all triples in the graph that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nGraphs:{graphs_content}\n\nQuery:{subquery}\n\nOutput:" 90 | retrieval = self.llm.response(prompt) 91 | subknowledges.append(retrieval) 92 | 93 | return subknowledges 94 | 95 | def do_extract_algorithm(self, query, subqueries, data_id): 96 | print(f"data_id: {data_id}, do_extract_algorithm...") 97 | 98 | algorithms = json.load(open(f"{self.algorithm_kb_path}/data_{data_id}.json")) 99 | algorithms_content = "\n\n".join(algorithms) 100 | 101 | subknowledges = [] 102 | for s, subquery in enumerate(subqueries): 103 | print(f"data_id: {data_id}, do_extract_algorithm... in subquery {s}/{len(subqueries)} in subqueries ..") 104 | prompt = f"Instruction: According to the query, filter out information from algorithm descriptions that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nAlgorithms:{algorithms_content}\n\nQuery:{subquery}\n\nOutput:" 105 | retrieval = self.llm.response(prompt) 106 | subknowledges.append(retrieval) 107 | 108 | return subknowledges 109 | 110 | def do_extract_catalogue(self, query, subqueries, data_id): 111 | print(f"data_id: {data_id}, do_extract_catalogue...") 112 | 113 | catalogues = json.load(open(f"{self.catalogue_kb_path}/data_{data_id}.json")) 114 | catalogues_content = "\n\n".join(catalogues) 115 | 116 | subknowledges = [] 117 | for s, subquery in enumerate(subqueries): 118 | print(f"data_id: {data_id}, do_extract_catalogue... in subquery {s}/{len(subqueries)} in subqueries ..") 119 | prompt = f"Instruction: According to the query, filter out information from the catalogue that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nCatalogues:{catalogues_content}\n\nQuery:{subquery}\n\nOutput:" 120 | retrieval = self.llm.response(prompt) 121 | subknowledges.append(retrieval) 122 | 123 | return subknowledges 124 | 125 | def do_merge(self, query, subqueries, subknowledges, chosen, data_id): 126 | print(f"data_id: {data_id}, do_merge...") 127 | 128 | retrieval_of_chunk = "" 129 | retrieval_of_graph = "" 130 | retrieval_of_table = "" 131 | retrieval_of_algorithm = "" 132 | retrieval_of_catalogue = "" 133 | 134 | if chosen == "chunk": 135 | subknowledges = "\n".join(subknowledges) 136 | retrieval_of_chunk += f"Subquery: {query}\nRetrieval results:\n{subknowledges}\n\n" 137 | elif chosen == "table": 138 | for subquery, subknowledge in zip(subqueries, subknowledges): 139 | retrieval_of_table += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n" 140 | elif chosen == "graph": 141 | for subquery, subknowledge in zip(subqueries, subknowledges): 142 | retrieval_of_graph += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n" 143 | elif chosen == "algorithm": 144 | for subquery, subknowledge in zip(subqueries, subknowledges): 145 | retrieval_of_algorithm += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n" 146 | elif chosen == "catalogue": 147 | subknowledges = "\n".join(subknowledges) 148 | retrieval_of_catalogue += f"Subquery: {query}\nRetrieval results:\n{subknowledges}\n\n" 149 | else: 150 | raise ValueError("chosen should be in ['chunk', 'table', 'graph', 'algorithm', 'catalogue']") 151 | 152 | decision = "No" 153 | new_query = "No" 154 | instruction = "1. Answer the Question based on retrieval results. \n2. Find the relevant information from given retrieval results and output as detailed, specific, and lengthy as possible. \n3. The output must be a coherent and smooth piece of text." 155 | prompt = f"Instruction:\n{instruction}\n\nQuestion:\n{query}\n\nRetrieval:\n{retrieval_of_chunk}{retrieval_of_graph}{retrieval_of_table}{retrieval_of_algorithm}{retrieval_of_catalogue}" 156 | 157 | answer = self.llm.response(prompt) 158 | 159 | return answer, decision, new_query 160 | -------------------------------------------------------------------------------- /utils/qwenapi.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import requests 4 | import os 5 | from transformers import AutoTokenizer 6 | 7 | 8 | class QwenAPI(): 9 | def __init__(self, url): 10 | self.url = url 11 | 12 | print("loading tokenizer") 13 | if os.path.exists("/mnt/data/lizhuoqun/hf_models/gpt2"): 14 | self.tokenizer = AutoTokenizer.from_pretrained("/mnt/data/lizhuoqun/hf_models/gpt2") 15 | else: 16 | raise Exception("No model path found") 17 | print("loading tokenizer done") 18 | 19 | def response(self, input_text, max_new_tokens=4096): 20 | current_time = time.time() 21 | 22 | input_text_len = len(self.tokenizer(input_text)['input_ids']) 23 | print(f"input_text_len: {input_text_len}") 24 | if input_text_len > 128000: 25 | print(f"input_text_len: {input_text_len}", "we reduce the input_text_len") 26 | input_text = input_text[:int(len(input_text)*(128000/input_text_len))] 27 | 28 | url = self.url 29 | headers = { 30 | # "Content-Type": "application/json", 31 | "Authorization": "EMPTY" 32 | } 33 | raw_info = { 34 | "model": "Qwen", 35 | "messages": [{"role": "user", "content": input_text}], 36 | "seed": 1024, 37 | "max_tokens": max_new_tokens 38 | } 39 | 40 | data = json.dumps(raw_info) 41 | # print(data) 42 | 43 | try_time = 0 44 | response = None 45 | while try_time < 3: 46 | try_time += 1 47 | 48 | try: 49 | callback = requests.post(url, headers=headers, data=data, timeout=(10000, 10000)) 50 | print("callback.status_code", callback.status_code) 51 | print(f"prompt_tokens: {callback.json()['usage']['prompt_tokens']}, total_tokens: {callback.json()['usage']['total_tokens']}, completion_tokens: {callback.json()['usage']['completion_tokens']}") 52 | except Exception as e: 53 | print(f"(print in qwenapi.py callback, try_time {try_time}) Error: {e}") 54 | continue 55 | 56 | try: 57 | result = callback.json() 58 | # print(result) 59 | # print(result.keys()) 60 | response = result['choices'][0]['message']['content'] 61 | # print(response) 62 | # input() 63 | break 64 | except Exception as e: 65 | print(f"(print in qwenapi.py response, try_time {try_time}) callback: {callback.json()} Error: {e}") 66 | if "Please reduce the length of the messages" in callback.json()['message']: 67 | current_tokne_len = callback.json()['message'].split("However, you requested")[1].split("tokens in the messages, Please")[0].strip() 68 | current_tokne_len = int(current_tokne_len) 69 | print(f"current_tokne_len: {current_tokne_len}") 70 | raw_info = { 71 | "model": "Qwen", 72 | "messages": [{"role": "user", "content": input_text[:int(len(input_text)*(128000/current_tokne_len))]}], 73 | "seed": 1024, 74 | "max_tokens": max_new_tokens 75 | } 76 | data = json.dumps(raw_info) 77 | continue 78 | 79 | if response is None: 80 | raise Exception(f"response is None") 81 | 82 | print("used time in this qwenapi:", (time.time()-current_time)/60, "min") 83 | return response --------------------------------------------------------------------------------