├── .gitignore
├── Loong
    ├── LICENSE
    ├── README.md
    ├── requirements.txt
    └── src
    │   ├── run.sh
    │   ├── step1_load_data.py
    │   ├── step2_model_generate.py
    │   ├── step3_model_evaluate.py
    │   ├── step4_cal_metric.py
    │   ├── test.sh
    │   ├── tmp.json
    │   ├── utils
    │       ├── args.py
    │       ├── config.py
    │       ├── generate.py
    │       ├── metric.py
    │       ├── prompt.py
    │       ├── token_length.py
    │       └── util.py
    │   └── vllm_example.sh
├── README.md
├── do_merge_each_batch.py
├── main.py
├── prompts
    ├── README.md
    ├── construct_algorithm.txt
    ├── construct_catalogue.txt
    ├── construct_graph.txt
    ├── construct_table.txt
    ├── decompose.txt
    └── route.txt
├── requirements.txt
├── router.py
├── structurizer.py
├── train_router
    ├── accelerate_configs
    │   ├── deepspeed_zero1.yaml
    │   ├── deepspeed_zero2.yaml
    │   ├── deepspeed_zero3.yaml
    │   ├── fsdp_qlora.yaml
    │   ├── multi_gpu.yaml
    │   └── single_gpu.yaml
    ├── data
    │   ├── test.json
    │   └── train.json
    ├── dpo.py
    └── train.sh
├── utilizer.py
└── utils
    └── qwenapi.py


/.gitignore:
--------------------------------------------------------------------------------
   1 | *.pyc
   2 | eval_results
   3 | intermediate_results
   4 | Loong/assets/logo.png
   5 | Loong/assets/main_fig.jpg
   6 | Loong/config/models/claude3haiku.yaml
   7 | Loong/config/models/claude35sonnet.yaml
   8 | Loong/config/models/geminipro.yaml
   9 | Loong/config/models/glm4.yaml
  10 | Loong/config/models/gpt4.yaml
  11 | Loong/config/models/gpt4o.yaml
  12 | Loong/config/models/qwen2.yaml
  13 | Loong/data/loong_process.jsonl
  14 | Loong/data/loong.jsonl
  15 | Loong/data/doc/README.md
  16 | Loong/data/doc/financial/2019-avni123118form10k.txt
  17 | Loong/data/doc/financial/2020-avni123119form10k.txt
  18 | Loong/data/doc/financial/2020-f10k2019_boxscorebrands.txt
  19 | Loong/data/doc/financial/2020-form10-k.txt
  20 | Loong/data/doc/financial/2021-avni123120form10k.txt
  21 | Loong/data/doc/financial/2021-f10k2020_boxscorebrands.txt
  22 | Loong/data/doc/financial/2021-form10-k.txt
  23 | Loong/data/doc/financial/2022-aqb-20211231x10k.txt
  24 | Loong/data/doc/financial/2022-avni123121form10k.txt
  25 | Loong/data/doc/financial/2022-f10k2021_boxscorebrands.txt
  26 | Loong/data/doc/financial/2022-form10-k.txt
  27 | Loong/data/doc/financial/2022-synl-20211231.txt
  28 | Loong/data/doc/financial/2023-acnt-20221231.txt
  29 | Loong/data/doc/financial/2023-Aetherium Acquisition Corp-n.txt
  30 | Loong/data/doc/financial/2023-AMERIGUARD SECURITY SERVICES, INC.-n.txt
  31 | Loong/data/doc/financial/2023-aqb-20221231x10k.txt
  32 | Loong/data/doc/financial/2023-avni123123form10k.txt
  33 | Loong/data/doc/financial/2023-f10k2022_boxscore.txt
  34 | Loong/data/doc/financial/2023-form10-k.txt
  35 | Loong/data/doc/financial/2024-1st FRANKLIN FINANCIAL CORP-j.txt
  36 | Loong/data/doc/financial/2024-4Front Ventures Corp.-j.txt
  37 | Loong/data/doc/financial/2024-1847 Holdings LLC-j.txt
  38 | Loong/data/doc/financial/2024-acnt-20231231.txt
  39 | Loong/data/doc/financial/2024-Acorda Therapeutics, Inc.-j.txt
  40 | Loong/data/doc/financial/2024-Acutus Medical, Inc.-j.txt
  41 | Loong/data/doc/financial/2024-ADIAL PHARMACEUTICALS, INC.-j.txt
  42 | Loong/data/doc/financial/2024-Aditxt, Inc.-j.txt
  43 | Loong/data/doc/financial/2024-ADM ENDEAVORS, INC.-j.txt
  44 | Loong/data/doc/financial/2024-Aetherium Acquisition Corp-n.txt
  45 | Loong/data/doc/financial/2024-Agape ATP Corp-j.txt
  46 | Loong/data/doc/financial/2024-AgEagle Aerial Systems Inc.-j.txt
  47 | Loong/data/doc/financial/2024-AGRIFORCE GROWING SYSTEMS LTD.-j.txt
  48 | Loong/data/doc/financial/2024-Agrify Corp-j.txt
  49 | Loong/data/doc/financial/2024-Aileron Therapeutics, Inc.-j.txt
  50 | Loong/data/doc/financial/2024-AIM ImmunoTech Inc.-j.txt
  51 | Loong/data/doc/financial/2024-Alaunos Therapeutics, Inc.-j.txt
  52 | Loong/data/doc/financial/2024-Alpha Investment Inc.-j.txt
  53 | Loong/data/doc/financial/2024-Alset Inc.-j.txt
  54 | Loong/data/doc/financial/2024-Ameri Metro, Inc. (formerly Yellowwood)-j.txt
  55 | Loong/data/doc/financial/2024-AMERICAN BATTERY MATERIALS, INC.-j.txt
  56 | Loong/data/doc/financial/2024-American Resources Corp-j.txt
  57 | Loong/data/doc/financial/2024-AMERICAN SHARED HOSPITAL SERVICES-j.txt
  58 | Loong/data/doc/financial/2024-American Strategic Investment Co.-j.txt
  59 | Loong/data/doc/financial/2024-AMERIGUARD SECURITY SERVICES, INC.-j.txt
  60 | Loong/data/doc/financial/2024-AMERIGUARD SECURITY SERVICES, INC.-n.txt
  61 | Loong/data/doc/financial/2024-Ameritek Ventures, Inc.-j.txt
  62 | Loong/data/doc/financial/2024-Apple iSports Group, Inc.-j.txt
  63 | Loong/data/doc/financial/2024-Appsoft Technologies, Inc.-j.txt
  64 | Loong/data/doc/financial/2024-AppTech Payments Corp.-j.txt
  65 | Loong/data/doc/financial/2024-APPYEA, INC-j.txt
  66 | Loong/data/doc/financial/2024-aqb-20231231x10k.txt
  67 | Loong/data/doc/financial/2024-AQUABOUNTY TECHNOLOGIES INC-j.txt
  68 | Loong/data/doc/financial/2024-Arax Holdings Corp-j.txt
  69 | Loong/data/doc/financial/2024-Arena Group Holdings, Inc.-j.txt
  70 | Loong/data/doc/financial/2024-ARVANA INC-j.txt
  71 | Loong/data/doc/financial/2024-ASCENT INDUSTRIES CO.-j.txt
  72 | Loong/data/doc/financial/2024-Aspira Women's Health Inc.-j.txt
  73 | Loong/data/doc/financial/2024-AST SpaceMobile, Inc.-j.txt
  74 | Loong/data/doc/financial/2024-ATOSSA THERAPEUTICS, INC.-j.txt
  75 | Loong/data/doc/financial/2024-AUDDIA INC.-j.txt
  76 | Loong/data/doc/financial/2024-Ault Alliance, Inc.-j.txt
  77 | Loong/data/doc/financial/2024-avni123123form10k.txt
  78 | Loong/data/doc/financial/2024-AXIM BIOTECHNOLOGIES, INC.-j.txt
  79 | Loong/data/doc/financial/2024-B. Riley Financial, Inc.-j.txt
  80 | Loong/data/doc/financial/2024-Backblaze, Inc.-j.txt
  81 | Loong/data/doc/financial/2024-Balance Labs, Inc.-j.txt
  82 | Loong/data/doc/financial/2024-BASANITE, INC.-j.txt
  83 | Loong/data/doc/financial/2024-BATTALION OIL CORP-j.txt
  84 | Loong/data/doc/financial/2024-Beam Global-j.txt
  85 | Loong/data/doc/financial/2024-Better Choice Co Inc.-j.txt
  86 | Loong/data/doc/financial/2024-Better Home & Finance Holding Co-j.txt
  87 | Loong/data/doc/financial/2024-Bio Essence Corp-j.txt
  88 | Loong/data/doc/financial/2024-BIOADAPTIVES, INC.-j.txt
  89 | Loong/data/doc/financial/2024-BioCorRx Inc.-j.txt
  90 | Loong/data/doc/financial/2024-BIOETHICS LTD-j.txt
  91 | Loong/data/doc/financial/2024-BIOFORCE NANOSCIENCES HOLDINGS, INC.-j.txt
  92 | Loong/data/doc/financial/2024-BIOLARGO, INC.-j.txt
  93 | Loong/data/doc/financial/2024-BiomX Inc.-j.txt
  94 | Loong/data/doc/financial/2024-BioNexus Gene Lab Corp-j.txt
  95 | Loong/data/doc/financial/2024-BIORA THERAPEUTICS, INC.-j.txt
  96 | Loong/data/doc/financial/2024-BioSig Technologies, Inc.-j.txt
  97 | Loong/data/doc/financial/2024-Biostax Corp.-j.txt
  98 | Loong/data/doc/financial/2024-Bitech Technologies Corp-j.txt
  99 | Loong/data/doc/financial/2024-BLACKBOXSTOCKS INC.-j.txt
 100 | Loong/data/doc/financial/2024-BLUE DOLPHIN ENERGY CO-j.txt
 101 | Loong/data/doc/financial/2024-BLUE DOLPHIN ENERGY CO-n.txt
 102 | Loong/data/doc/financial/2024-Blue Line Protection Group, Inc.-j.txt
 103 | Loong/data/doc/financial/2024-Blue Star Foods Corp.-j.txt
 104 | Loong/data/doc/financial/2024-BM Technologies, Inc.-j.txt
 105 | Loong/data/doc/financial/2024-Boxabl Inc.-j.txt
 106 | Loong/data/doc/financial/2024-BRAINSTORM CELL THERAPEUTICS INC.-j.txt
 107 | Loong/data/doc/financial/2024-BRAINSTORM CELL THERAPEUTICS INC.-n.txt
 108 | Loong/data/doc/financial/2024-Brand Engagement Network Inc.-j.txt
 109 | Loong/data/doc/financial/2024-Breeze Holdings Acquisition Corp.-j.txt
 110 | Loong/data/doc/financial/2024-BrewBilt Brewing Co-j.txt
 111 | Loong/data/doc/financial/2024-Bright Mountain Media, Inc.-j.txt
 112 | Loong/data/doc/financial/2024-Broad Street Realty, Inc.-j.txt
 113 | Loong/data/doc/financial/2024-BROADWAY FINANCIAL CORP /DE/-j.txt
 114 | Loong/data/doc/financial/2024-BT Brands, Inc.-j.txt
 115 | Loong/data/doc/financial/2024-BTC Digital Ltd.-j.txt
 116 | Loong/data/doc/financial/2024-BurgerFi International, Inc.-j.txt
 117 | Loong/data/doc/financial/2024-C & C TOURS, INC.-j.txt
 118 | Loong/data/doc/financial/2024-C-Bond Systems, Inc-j.txt
 119 | Loong/data/doc/financial/2024-CalEthos, Inc.-j.txt
 120 | Loong/data/doc/financial/2024-CaliberCos Inc.-j.txt
 121 | Loong/data/doc/financial/2024-Can B Corp-j.txt
 122 | Loong/data/doc/financial/2024-CANCER CAPITAL CORP-j.txt
 123 | Loong/data/doc/financial/2024-Cano Health, Inc.-j.txt
 124 | Loong/data/doc/financial/2024-Canoo Inc.-j.txt
 125 | Loong/data/doc/financial/2024-Carisma Therapeutics Inc.-j.txt
 126 | Loong/data/doc/financial/2024-Catheter Precision, Inc.-j.txt
 127 | Loong/data/doc/financial/2024-Cepton, Inc.-j.txt
 128 | Loong/data/doc/financial/2024-CFN Enterprises Inc.-j.txt
 129 | Loong/data/doc/financial/2024-Charlie's Holdings, Inc.-j.txt
 130 | Loong/data/doc/financial/2024-CHASE PACKAGING CORP-j.txt
 131 | Loong/data/doc/financial/2024-CHESAPEAKE GRANITE WASH TRUST-j.txt
 132 | Loong/data/doc/financial/2024-Chicken Soup for the Soul Entertainment, Inc.-j.txt
 133 | Loong/data/doc/financial/2024-China Foods Holdings Ltd.-j.txt
 134 | Loong/data/doc/financial/2024-CHINA PHARMA HOLDINGS, INC.-j.txt
 135 | Loong/data/doc/financial/2024-Chosen, Inc.-j.txt
 136 | Loong/data/doc/financial/2024-Churchill Capital Corp VII-j.txt
 137 | Loong/data/doc/financial/2024-Cidara Therapeutics, Inc.-j.txt
 138 | Loong/data/doc/financial/2024-CIM Opportunity Zone Fund, L.P.-j.txt
 139 | Loong/data/doc/financial/2024-CIRTRAN CORP-j.txt
 140 | Loong/data/doc/financial/2024-CISO Global, Inc.-j.txt
 141 | Loong/data/doc/financial/2024-Clean Energy Technologies, Inc.-j.txt
 142 | Loong/data/doc/financial/2024-Clean Vision Corp-j.txt
 143 | Loong/data/doc/financial/2024-CLEARONE INC-j.txt
 144 | Loong/data/doc/financial/2024-ClearSign Technologies Corp-j.txt
 145 | Loong/data/doc/financial/2024-CNS Pharmaceuticals, Inc.-j.txt
 146 | Loong/data/doc/financial/2024-Complete Solaria, Inc.-j.txt
 147 | Loong/data/doc/financial/2024-Corner Growth Acquisition Corp.-j.txt
 148 | Loong/data/doc/financial/2024-Correlate Energy Corp.-j.txt
 149 | Loong/data/doc/financial/2024-Cosmos Group Holdings Inc.-j.txt
 150 | Loong/data/doc/financial/2024-Coyni, Inc.-j.txt
 151 | Loong/data/doc/financial/2024-CPI AEROSTRUCTURES INC-j.txt
 152 | Loong/data/doc/financial/2024-CQENS Technologies Inc.-j.txt
 153 | Loong/data/doc/financial/2024-CROSS TIMBERS ROYALTY TRUST-j.txt
 154 | Loong/data/doc/financial/2024-Crown Electrokinetics Corp.-j.txt
 155 | Loong/data/doc/financial/2024-Crypto Co-j.txt
 156 | Loong/data/doc/financial/2024-Cuentas Inc.-j.txt
 157 | Loong/data/doc/financial/2024-CUTERA INC-j.txt
 158 | Loong/data/doc/financial/2024-Data Call Technologies-j.txt
 159 | Loong/data/doc/financial/2024-Data443 Risk Mitigation, Inc.-j.txt
 160 | Loong/data/doc/financial/2024-DAWSON GEOPHYSICAL CO-j.txt
 161 | Loong/data/doc/financial/2024-DecisionPoint Systems, Inc.-j.txt
 162 | Loong/data/doc/financial/2024-DIGITAL ALLY, INC.-j.txt
 163 | Loong/data/doc/financial/2024-Digital Brands Group, Inc.-j.txt
 164 | Loong/data/doc/financial/2024-Digital Media Solutions, Inc.-j.txt
 165 | Loong/data/doc/financial/2024-DISH DBS CORP-j.txt
 166 | Loong/data/doc/financial/2024-DIVALL INSURED INCOME PROPERTIES 2 LIMITED PARTNERSHIP-j.txt
 167 | Loong/data/doc/financial/2024-Dolphin Entertainment, Inc.-j.txt
 168 | Loong/data/doc/financial/2024-Doma Holdings, Inc.-j.txt
 169 | Loong/data/doc/financial/2024-Dominari Holdings Inc.-j.txt
 170 | Loong/data/doc/financial/2024-Dror Ortho-Design, Inc.-j.txt
 171 | Loong/data/doc/financial/2024-DUOS TECHNOLOGIES GROUP, INC.-j.txt
 172 | Loong/data/doc/financial/2024-Dynamic Shares Trust-j.txt
 173 | Loong/data/doc/financial/2024-DYNARESOURCE INC-j.txt
 174 | Loong/data/doc/financial/2024-ea0202445-10k_american.txt
 175 | Loong/data/doc/financial/2024-Edgemode, Inc.-j.txt
 176 | Loong/data/doc/financial/2024-Edible Garden AG Inc-j.txt
 177 | Loong/data/doc/financial/2024-Eiger BioPharmaceuticals, Inc.-n.txt
 178 | Loong/data/doc/financial/2024-Elvictor Group, Inc.-j.txt
 179 | Loong/data/doc/financial/2024-EOS INC.-j.txt
 180 | Loong/data/doc/financial/2024-EQUUS TOTAL RETURN, INC.-j.txt
 181 | Loong/data/doc/financial/2024-ExchangeRight Income Fund-j.txt
 182 | Loong/data/doc/financial/2024-Exela Technologies, Inc.-j.txt
 183 | Loong/data/doc/financial/2024-Ezagoo Ltd-j.txt
 184 | Loong/data/doc/financial/2024-EzFill Holdings Inc-j.txt
 185 | Loong/data/doc/financial/2024-Fathom Digital Manufacturing Corp-j.txt
 186 | Loong/data/doc/financial/2024-FLEXIBLE SOLUTIONS INTERNATIONAL INC-j.txt
 187 | Loong/data/doc/financial/2024-FlexShopper, Inc.-j.txt
 188 | Loong/data/doc/financial/2024-Fluent, Inc.-j.txt
 189 | Loong/data/doc/financial/2024-FOCUS UNIVERSAL INC.-j.txt
 190 | Loong/data/doc/financial/2024-FORGE INNOVATION DEVELOPMENT CORP.-j.txt
 191 | Loong/data/doc/financial/2024-form10-k.txt
 192 | Loong/data/doc/financial/2024-Fortune Valley Treasures, Inc.-j.txt
 193 | Loong/data/doc/financial/2024-FRACTYL HEALTH, INC.-j.txt
 194 | Loong/data/doc/financial/2024-Free Flow, Inc.-j.txt
 195 | Loong/data/doc/financial/2024-Future FinTech Group Inc.-j.txt
 196 | Loong/data/doc/financial/2024-GameSquare Holdings, Inc.-j.txt
 197 | Loong/data/doc/financial/2024-Gaucho Group Holdings, Inc.-j.txt
 198 | Loong/data/doc/financial/2024-GBT Technologies Inc.-j.txt
 199 | Loong/data/doc/financial/2024-GD Culture Group Ltd-j.txt
 200 | Loong/data/doc/financial/2024-General Enterprise Ventures, Inc.-j.txt
 201 | Loong/data/doc/financial/2024-GENERATION INCOME PROPERTIES, INC.-j.txt
 202 | Loong/data/doc/financial/2024-Generations Bancorp NY, Inc.-j.txt
 203 | Loong/data/doc/financial/2024-Genprex, Inc.-j.txt
 204 | Loong/data/doc/financial/2024-GIVEMEPOWER CORP-j.txt
 205 | Loong/data/doc/financial/2024-Global Clean Energy Holdings, Inc.-j.txt
 206 | Loong/data/doc/financial/2024-Global Gas Corp-j.txt
 207 | Loong/data/doc/financial/2024-Goal Acquisitions Corp.-j.txt
 208 | Loong/data/doc/financial/2024-Gofba, Inc.-j.txt
 209 | Loong/data/doc/financial/2024-GOLDENWELL BIOTECH, INC.-j.txt
 210 | Loong/data/doc/financial/2024-Goodness Growth Holdings, Inc.-j.txt
 211 | Loong/data/doc/financial/2024-GRANT PARK FUTURES FUND LIMITED PARTNERSHIP-j.txt
 212 | Loong/data/doc/financial/2024-GRANT PARK FUTURES FUND LIMITED PARTNERSHIP-n.txt
 213 | Loong/data/doc/financial/2024-Greater Cannabis Company, Inc.-j.txt
 214 | Loong/data/doc/financial/2024-Greenbrook TMS Inc.-j.txt
 215 | Loong/data/doc/financial/2024-Greenland Technologies Holding Corp.-j.txt
 216 | Loong/data/doc/financial/2024-Greenwave Technology Solutions, Inc.-j.txt
 217 | Loong/data/doc/financial/2024-Greenwich LifeSciences, Inc.-j.txt
 218 | Loong/data/doc/financial/2024-GRESHAM WORLDWIDE, INC.-j.txt
 219 | Loong/data/doc/financial/2024-GRIID Infrastructure Inc.-j.txt
 220 | Loong/data/doc/financial/2024-Grom Social Enterprises, Inc.-j.txt
 221 | Loong/data/doc/financial/2024-Gryphon Digital Mining, Inc.-j.txt
 222 | Loong/data/doc/financial/2024-GSE SYSTEMS INC-j.txt
 223 | Loong/data/doc/financial/2024-Hapi Metaverse Inc.-j.txt
 224 | Loong/data/doc/financial/2024-HARTE HANKS INC-j.txt
 225 | Loong/data/doc/financial/2024-HCW Biologics Inc.-j.txt
 226 | Loong/data/doc/financial/2024-HealthLynked Corp-j.txt
 227 | Loong/data/doc/financial/2024-HIGH WIRE NETWORKS, INC.-j.txt
 228 | Loong/data/doc/financial/2024-HOUSTON AMERICAN ENERGY CORP-j.txt
 229 | Loong/data/doc/financial/2024-HUGOTON ROYALTY TRUST-j.txt
 230 | Loong/data/doc/financial/2024-iLearningEngines, Inc.-j.txt
 231 | Loong/data/doc/financial/2024-ISUN, INC.-n.txt
 232 | Loong/data/doc/financial/2024-Lovesac Co-n.txt
 233 | Loong/data/doc/financial/2024-Nano Magic Inc.-n.txt
 234 | Loong/data/doc/financial/2024-Quarta-Rad, Inc.-n.txt
 235 | Loong/data/doc/financial/2024-Sequoia Mortgage Trust 2013-2-n.txt
 236 | Loong/data/doc/financial/report_000001-2024-平安银行-2024年第一季度报告.txt
 237 | Loong/data/doc/financial/report_000026-2024-飞亚达-2024年一季度报告.txt
 238 | Loong/data/doc/financial/report_000049-2024-德赛电池-2024年一季度报告.txt
 239 | Loong/data/doc/financial/report_000409-2024-云鼎科技-2024年一季度报告.txt
 240 | Loong/data/doc/financial/report_000423-2015-东阿阿胶-2015年第三季度报告全文.txt
 241 | Loong/data/doc/financial/report_000423-2015-东阿阿胶-2015年第一季度报告全文.txt
 242 | Loong/data/doc/financial/report_000423-2016-东阿阿胶-2016年第三季度报告全文.txt
 243 | Loong/data/doc/financial/report_000423-2016-东阿阿胶-2016年第一季度报告全文.txt
 244 | Loong/data/doc/financial/report_000423-2017-东阿阿胶-2017年第三季度报告全文.txt
 245 | Loong/data/doc/financial/report_000423-2017-东阿阿胶-2017年第一季度报告全文.txt
 246 | Loong/data/doc/financial/report_000423-2018-东阿阿胶-2018年第三季度报告全文.txt
 247 | Loong/data/doc/financial/report_000423-2019-东阿阿胶-2019年第一季度报告全文.txt
 248 | Loong/data/doc/financial/report_000423-2020-东阿阿胶-2020年第一季度报告全文.txt
 249 | Loong/data/doc/financial/report_000423-2021-东阿阿胶-2021年第一季度报告全文.txt
 250 | Loong/data/doc/financial/report_000423-2022-东阿阿胶-2022年一季度报告.txt
 251 | Loong/data/doc/financial/report_000423-2023-东阿阿胶-2023年一季度报告.txt
 252 | Loong/data/doc/financial/report_000423-2024-东阿阿胶-2024年一季度报告.txt
 253 | Loong/data/doc/financial/report_000488-2024-晨鸣纸业-2024年一季度报告.txt
 254 | Loong/data/doc/financial/report_000498-2024-山东路桥-2024年一季度报告.txt
 255 | Loong/data/doc/financial/report_000536-2024-华映科技-2024年一季度报告.txt
 256 | Loong/data/doc/financial/report_000554-2024-泰山石油-2024年一季度报告.txt
 257 | Loong/data/doc/financial/report_000567-2024-海德股份-2024年一季度报告.txt
 258 | Loong/data/doc/financial/report_000607-2024-华媒控股-2024年一季度报告.txt
 259 | Loong/data/doc/financial/report_000631-2024-顺发恒业-2024年一季度报告.txt
 260 | Loong/data/doc/financial/report_000635-2024-英力特-2024年一季度报告.txt
 261 | Loong/data/doc/financial/report_000636-2024-风华高科-2024年一季度报告.txt
 262 | Loong/data/doc/financial/report_000651-2015-格力电器-2015年第三季度报告全文.txt
 263 | Loong/data/doc/financial/report_000651-2016-格力电器-2016年第三季度报告全文.txt
 264 | Loong/data/doc/financial/report_000651-2017-格力电器-2017年第三季度报告全文.txt
 265 | Loong/data/doc/financial/report_000651-2017-格力电器-2017年第一季度报告全文.txt
 266 | Loong/data/doc/financial/report_000651-2018-格力电器-2018年第三季度报告全文.txt
 267 | Loong/data/doc/financial/report_000651-2019-格力电器-2019年第一季度报告全文.txt
 268 | Loong/data/doc/financial/report_000651-2020-格力电器-2020年第一季度报告全文.txt
 269 | Loong/data/doc/financial/report_000651-2021-格力电器-2021年第一季度报告全文.txt
 270 | Loong/data/doc/financial/report_000651-2022-格力电器-2022年一季度报告.txt
 271 | Loong/data/doc/financial/report_000651-2023-格力电器-2023年一季度报告.txt
 272 | Loong/data/doc/financial/report_000651-2024-格力电器-2024年一季度报告.txt
 273 | Loong/data/doc/financial/report_000702-2024-正虹科技-2024年一季度报告.txt
 274 | Loong/data/doc/financial/report_000819-2024-岳阳兴长-2024年一季度报告.txt
 275 | Loong/data/doc/financial/report_000848-2024-承德露露-2024年一季度报告.txt
 276 | Loong/data/doc/financial/report_000863-2024-三湘印象-2024年一季度报告.txt
 277 | Loong/data/doc/financial/report_000888-2024-峨眉山Ａ-2024年一季度报告.txt
 278 | Loong/data/doc/financial/report_000906-2024-浙商中拓-2024年一季度报告.txt
 279 | Loong/data/doc/financial/report_000921-2024-海信家电-2024年一季度报告.txt
 280 | Loong/data/doc/financial/report_000933-2024-神火股份-2024年一季度报告.txt
 281 | Loong/data/doc/financial/report_000959-2024-首钢股份-2024年一季度报告.txt
 282 | Loong/data/doc/financial/report_000972-2024-中基健康-2024年一季度报告.txt
 283 | Loong/data/doc/financial/report_000989-2024-九芝堂-2024年一季度报告.txt
 284 | Loong/data/doc/financial/report_001203-2024-大中矿业-2024年一季度报告.txt
 285 | Loong/data/doc/financial/report_001207-2024-联科科技-2024年一季度报告.txt
 286 | Loong/data/doc/financial/report_001223-2024-欧克科技-2024年一季度报告.txt
 287 | Loong/data/doc/financial/report_001306-2024-夏厦精密-2024年一季度报告.txt
 288 | Loong/data/doc/financial/report_001322-2024-箭牌家居-2024年一季度报告.txt
 289 | Loong/data/doc/financial/report_001360-2024-南矿集团-2024年一季度报告.txt
 290 | Loong/data/doc/financial/report_001368-2024-通达创智-2024年一季度报告.txt
 291 | Loong/data/doc/financial/report_001378-2024-德冠新材-2024年一季度报告.txt
 292 | Loong/data/doc/financial/report_001387-2024-雪祺电气-2024年一季度报告.txt
 293 | Loong/data/doc/financial/report_002011-2024-盾安环境-2024年一季度报告.txt
 294 | Loong/data/doc/financial/report_002019-2024-亿帆医药-2024年一季度报告.txt
 295 | Loong/data/doc/financial/report_002020-2024-京新药业-2024年一季度报告.txt
 296 | Loong/data/doc/financial/report_002023-2024-海特高新-2024年一季度报告.txt
 297 | Loong/data/doc/financial/report_002026-2024-山东威达-2024年一季度报告.txt
 298 | Loong/data/doc/financial/report_002033-2024-丽江股份-2024年一季度报告.txt
 299 | Loong/data/doc/financial/report_002065-2024-东华软件-2024年一季度报告.txt
 300 | Loong/data/doc/financial/report_002090-2024-金智科技-2024年一季度报告.txt
 301 | Loong/data/doc/financial/report_002093-2024-国脉科技-2024年一季度报告.txt
 302 | Loong/data/doc/financial/report_002138-2024-顺络电子-2024年一季度报告.txt
 303 | Loong/data/doc/financial/report_002167-2024-东方锆业-2024年一季度报告.txt
 304 | Loong/data/doc/financial/report_002186-2024-全聚德-2024年一季度报告.txt
 305 | Loong/data/doc/financial/report_002194-2024-武汉凡谷-2024年一季度报告.txt
 306 | Loong/data/doc/financial/report_002230-2024-科大讯飞-2024年一季度报告.txt
 307 | Loong/data/doc/financial/report_002242-2024-九阳股份-2024年一季度报告.txt
 308 | Loong/data/doc/financial/report_002245-2024-蔚蓝锂芯-2024年一季度报告.txt
 309 | Loong/data/doc/financial/report_002283-2024-天润工业-2024年一季度报告.txt
 310 | Loong/data/doc/financial/report_002285-2024-世联行-2024年一季度报告.txt
 311 | Loong/data/doc/financial/report_002304-2015-洋河股份-2015年第三季度报告全文.txt
 312 | Loong/data/doc/financial/report_002304-2015-洋河股份-2015年第一季度报告全文.txt
 313 | Loong/data/doc/financial/report_002304-2016-洋河股份-2016年第三季度报告全文.txt
 314 | Loong/data/doc/financial/report_002304-2017-洋河股份-2017年第三季度报告全文.txt
 315 | Loong/data/doc/financial/report_002304-2017-洋河股份-2017年第一季度报告全文.txt
 316 | Loong/data/doc/financial/report_002304-2018-洋河股份-2018年第三季度报告全文.txt
 317 | Loong/data/doc/financial/report_002304-2018-洋河股份-2018年第一季度报告全文.txt
 318 | Loong/data/doc/financial/report_002304-2019-洋河股份-2019年第一季度报告全文.txt
 319 | Loong/data/doc/financial/report_002304-2020-洋河股份-2020年第一季度报告全文.txt
 320 | Loong/data/doc/financial/report_002304-2021-洋河股份-2021年第一季度报告全文.txt
 321 | Loong/data/doc/financial/report_002304-2022-洋河股份-2022年一季度报告.txt
 322 | Loong/data/doc/financial/report_002304-2023-洋河股份-2023年一季度报告.txt
 323 | Loong/data/doc/financial/report_002304-2024-洋河股份-2024年一季度报告.txt
 324 | Loong/data/doc/financial/report_002312-2024-川发龙蟒-2024年一季度报告.txt
 325 | Loong/data/doc/financial/report_002317-2024-众生药业-2024年一季度报告.txt
 326 | Loong/data/doc/financial/report_002384-2024-东山精密-2024年一季度报告.txt
 327 | Loong/data/doc/financial/report_002391-2024-长青股份-2024年一季度报告.txt
 328 | Loong/data/doc/financial/report_002415-2024-海康威视-2024年第一季度报告.txt
 329 | Loong/data/doc/financial/report_002461-2024-珠江啤酒-2024年一季度报告.txt
 330 | Loong/data/doc/financial/report_002463-2024-沪电股份-2024年一季度报告.txt
 331 | Loong/data/doc/financial/report_002468-2024-申通快递-2024年一季度报告.txt
 332 | Loong/data/doc/financial/report_002519-2024-银河电子-2024年一季度报告.txt
 333 | Loong/data/doc/financial/report_002537-2024-海联金汇-2024年一季度报告.txt
 334 | Loong/data/doc/financial/report_002588-2024-史丹利-2024年一季度报告.txt
 335 | Loong/data/doc/financial/report_002591-2024-恒大高新-2024年一季度报告.txt
 336 | Loong/data/doc/financial/report_002594-2015-比亚迪-2015年第三季度报告全文.txt
 337 | Loong/data/doc/financial/report_002594-2016-比亚迪-2016年第三季度报告全文.txt
 338 | Loong/data/doc/financial/report_002594-2017-比亚迪-2017年第三季度报告全文.txt
 339 | Loong/data/doc/financial/report_002594-2018-比亚迪-2018年第三季度报告全文.txt
 340 | Loong/data/doc/financial/report_002594-2019-比亚迪-2019年第一季度报告全文.txt
 341 | Loong/data/doc/financial/report_002594-2020-比亚迪-2020年第一季度报告全文.txt
 342 | Loong/data/doc/financial/report_002594-2021-比亚迪-2021年第一季度报告全文.txt
 343 | Loong/data/doc/financial/report_002594-2022-比亚迪-2022年一季度报告.txt
 344 | Loong/data/doc/financial/report_002594-2023-比亚迪-2023年一季度报告.txt
 345 | Loong/data/doc/financial/report_002594-2024-比亚迪-2024年一季度报告.txt
 346 | Loong/data/doc/financial/report_002606-2024-大连电瓷-2024年一季度报告.txt
 347 | Loong/data/doc/financial/report_002664-2024-信质集团-2024年一季度报告.txt
 348 | Loong/data/doc/financial/report_002676-2024-顺威股份-2024年一季度报告.txt
 349 | Loong/data/doc/financial/report_002681-2024-奋达科技-2024年一季度报告.txt
 350 | Loong/data/doc/financial/report_002683-2024-广东宏大-2024年一季度报告.txt
 351 | Loong/data/doc/financial/report_002726-2024-龙大美食-2024年一季度报告.txt
 352 | Loong/data/doc/financial/report_002762-2024-金发拉比-2024年一季度报告.txt
 353 | Loong/data/doc/financial/report_002767-2024-先锋电子-2024年一季度报告.txt
 354 | Loong/data/doc/financial/report_002773-2024-康弘药业-2024年一季度报告.txt
 355 | Loong/data/doc/financial/report_002791-2024-坚朗五金-2024年一季度报告.txt
 356 | Loong/data/doc/financial/report_002812-2024-恩捷股份-2024年一季度报告.txt
 357 | Loong/data/doc/financial/report_002843-2024-泰嘉股份-2023年第一季度报告修订版.txt
 358 | Loong/data/doc/financial/report_002859-2024-洁美科技-2024年一季度报告.txt
 359 | Loong/data/doc/financial/report_002881-2024-美格智能-2024年一季度报告.txt
 360 | Loong/data/doc/financial/report_002891-2024-中宠股份-2024年一季度报告.txt
 361 | Loong/data/doc/financial/report_002928-2024-华夏航空-2024年一季度报告.txt
 362 | Loong/data/doc/financial/report_002938-2024-鹏鼎控股-2024年一季度报告.txt
 363 | Loong/data/doc/financial/report_002970-2024-锐明技术-2024年一季度报告.txt
 364 | Loong/data/doc/financial/report_002996-2024-顺博合金-2024年一季度报告.txt
 365 | Loong/data/doc/financial/report_002997-2024-瑞鹄模具-2024年一季度报告.txt
 366 | Loong/data/doc/financial/report_003006-2024-百亚股份-2024年一季度报告.txt
 367 | Loong/data/doc/financial/report_003816-2024-中国广核-2024年第一季度报告.txt
 368 | Loong/data/doc/financial/report_300004-2024-南风股份-2024年一季度报告.txt
 369 | Loong/data/doc/financial/report_300014-2024-亿纬锂能-2024年一季度报告.txt
 370 | Loong/data/doc/financial/report_300016-2024-北陆药业-2024年一季度报告.txt
 371 | Loong/data/doc/financial/report_300069-2024-金利华电-2024年一季度报告.txt
 372 | Loong/data/doc/financial/report_300070-2024-碧水源-2024年一季度报告.txt
 373 | Loong/data/doc/financial/report_300072-2024-海新能科-2024年一季度报告.txt
 374 | Loong/data/doc/financial/report_300079-2024-数码视讯-2024年一季度报告.txt
 375 | Loong/data/doc/financial/report_300091-2024-金通灵-2024年一季度报告.txt
 376 | Loong/data/doc/financial/report_300092-2024-科新机电-2024年一季度报告.txt
 377 | Loong/data/doc/financial/report_300115-2024-长盈精密-2024年一季度报告.txt
 378 | Loong/data/doc/financial/report_300126-2024-锐奇股份-2024年一季度报告.txt
 379 | Loong/data/doc/financial/report_300133-2024-华策影视-2024年一季度报告.txt
 380 | Loong/data/doc/financial/report_300134-2024-大富科技-2024年一季度报告.txt
 381 | Loong/data/doc/financial/report_300136-2024-信维通信-2024年一季度报告.txt
 382 | Loong/data/doc/financial/report_300140-2024-节能环境-2024年一季度报告.txt
 383 | Loong/data/doc/financial/report_300142-2024-沃森生物-2024年一季度报告.txt
 384 | Loong/data/doc/financial/report_300146-2015-汤臣倍健-2015年第三季度报告全文.txt
 385 | Loong/data/doc/financial/report_300146-2016-汤臣倍健-2016年第三季度报告全文.txt
 386 | Loong/data/doc/financial/report_300146-2017-汤臣倍健-2017年第三季度报告全文.txt
 387 | Loong/data/doc/financial/report_300146-2019-汤臣倍健-2019年第一季度报告全文.txt
 388 | Loong/data/doc/financial/report_300146-2021-汤臣倍健-2021年第一季度报告全文.txt
 389 | Loong/data/doc/financial/report_300146-2022-汤臣倍健-2022年一季度报告.txt
 390 | Loong/data/doc/financial/report_300146-2023-汤臣倍健-2023年一季度报告.txt
 391 | Loong/data/doc/financial/report_300146-2024-汤臣倍健-2024年一季度报告.txt
 392 | Loong/data/doc/financial/report_300160-2024-秀强股份-2024年一季度报告.txt
 393 | Loong/data/doc/financial/report_300162-2024-雷曼光电-2024年一季度报告.txt
 394 | Loong/data/doc/financial/report_300174-2024-元力股份-2024年一季度报告.txt
 395 | Loong/data/doc/financial/report_300179-2024-四方达-2024年一季度报告.txt
 396 | Loong/data/doc/financial/report_300180-2024-华峰超纤-2024年一季度报告.txt
 397 | Loong/data/doc/financial/report_300212-2024-易华录-2024年一季度报告.txt
 398 | Loong/data/doc/financial/report_300221-2024-银禧科技-2024年一季度报告.txt
 399 | Loong/data/doc/financial/report_300223-2024-北京君正-2024年一季度报告.txt
 400 | Loong/data/doc/financial/report_300232-2024-洲明科技-2024年一季度报告.txt
 401 | Loong/data/doc/financial/report_300239-2024-东宝生物-2024年一季度报告.txt
 402 | Loong/data/doc/financial/report_300249-2024-依米康-2024年一季度报告.txt
 403 | Loong/data/doc/financial/report_300263-2024-隆华科技-2024年一季度报告.txt
 404 | Loong/data/doc/financial/report_300275-2024-梅安森-2024年一季度报告.txt
 405 | Loong/data/doc/financial/report_300278-2024-华昌达-2024年一季度报告.txt
 406 | Loong/data/doc/financial/report_300284-2024-苏交科-2024年一季度报告.txt
 407 | Loong/data/doc/financial/report_300292-2024-吴通控股-2024年一季度报告.txt
 408 | Loong/data/doc/financial/report_300299-2024-富春股份-2024年一季度报告.txt
 409 | Loong/data/doc/financial/report_300300-2024-海峡创新-2024年一季度报告.txt
 410 | Loong/data/doc/financial/report_300314-2024-戴维医疗-2024年一季度报告.txt
 411 | Loong/data/doc/financial/report_300321-2024-同大股份-2024年一季度报告.txt
 412 | Loong/data/doc/financial/report_300337-2024-银邦股份-2024年一季度报告.txt
 413 | Loong/data/doc/financial/report_300349-2024-金卡智能-2024年一季度报告.txt
 414 | Loong/data/doc/financial/report_300354-2024-东华测试-2024年一季度报告.txt
 415 | Loong/data/doc/financial/report_300358-2024-楚天科技-2024年一季度报告.txt
 416 | Loong/data/doc/financial/report_300375-2024-鹏翎股份-2024年一季度报告.txt
 417 | Loong/data/doc/financial/report_300390-2024-天华新能-2024年一季度报告.txt
 418 | Loong/data/doc/financial/report_300393-2024-中来股份-2024年一季度报告.txt
 419 | Loong/data/doc/financial/report_300394-2024-天孚通信-2024年一季度报告.txt
 420 | Loong/data/doc/financial/report_300396-2024-迪瑞医疗-2024年一季度报告.txt
 421 | Loong/data/doc/financial/report_300404-2024-博济医药-2024年一季度报告.txt
 422 | Loong/data/doc/financial/report_300413-2024-芒果超媒-2024年一季度报告.txt
 423 | Loong/data/doc/financial/report_300414-2024-中光防雷-2024年一季度报告.txt
 424 | Loong/data/doc/financial/report_300439-2024-美康生物-2024年一季度报告.txt
 425 | Loong/data/doc/financial/report_300446-2024-航天智造-2024年一季度报告.txt
 426 | Loong/data/doc/financial/report_300462-2024-华铭智能-2024年一季度报告.txt
 427 | Loong/data/doc/financial/report_300463-2024-迈克生物-2024年一季度报告.txt
 428 | Loong/data/doc/financial/report_300467-2024-迅游科技-2024年一季度报告.txt
 429 | Loong/data/doc/financial/report_300475-2024-香农芯创-2024年一季度报告.txt
 430 | Loong/data/doc/financial/report_300478-2024-杭州高新-2024年一季度报告.txt
 431 | Loong/data/doc/financial/report_300487-2024-蓝晓科技-2024年一季度报告.txt
 432 | Loong/data/doc/financial/report_300496-2024-中科创达-2024年一季度报告.txt
 433 | Loong/data/doc/financial/report_300502-2024-新易盛-2024年一季度报告.txt
 434 | Loong/data/doc/financial/report_300525-2024-博思软件-2024年一季度报告.txt
 435 | Loong/data/doc/financial/report_300540-2024-蜀道装备-2024年一季度报告.txt
 436 | Loong/data/doc/financial/report_300541-2024-先进数通-2024年一季度报告.txt
 437 | Loong/data/doc/financial/report_300550-2024-和仁科技-2024年一季度报告.txt
 438 | Loong/data/doc/financial/report_300580-2024-贝斯特-2024年一季度报告.txt
 439 | Loong/data/doc/financial/report_300581-2024-晨曦航空-2024年一季度报告.txt
 440 | Loong/data/doc/financial/report_300584-2024-海辰药业-2024年一季度报告.txt
 441 | Loong/data/doc/financial/report_300589-2024-江龙船艇-2024年一季度报告.txt
 442 | Loong/data/doc/financial/report_300590-2024-移为通信-2024年一季度报告.txt
 443 | Loong/data/doc/financial/report_300592-2024-华凯易佰-2024年一季度报告.txt
 444 | Loong/data/doc/financial/report_300607-2024-拓斯达-2024年一季度报告.txt
 445 | Loong/data/doc/financial/report_300608-2024-思特奇-2024年一季度报告.txt
 446 | Loong/data/doc/financial/report_300617-2024-安靠智电-2024年一季度报告.txt
 447 | Loong/data/doc/financial/report_300621-2024-维业股份-2024年一季度报告.txt
 448 | Loong/data/doc/financial/report_300644-2024-南京聚隆-2024年一季度报告.txt
 449 | Loong/data/doc/financial/report_300672-2024-国科微-2024年一季度报告.txt
 450 | Loong/data/doc/financial/report_300673-2024-佩蒂股份-2024年一季度报告.txt
 451 | Loong/data/doc/financial/report_300678-2024-中科信息-2024年一季度报告.txt
 452 | Loong/data/doc/financial/report_300679-2024-电连技术-2024年一季度报告.txt
 453 | Loong/data/doc/financial/report_300683-2024-海特生物-2024年一季度报告.txt
 454 | Loong/data/doc/financial/report_300710-2024-万隆光电-2024年一季度报告.txt
 455 | Loong/data/doc/financial/report_300725-2024-药石科技-2024年一季度报告.txt
 456 | Loong/data/doc/financial/report_300741-2024-华宝股份-2024年一季度报告.txt
 457 | Loong/data/doc/financial/report_300743-2024-天地数码-2024年一季度报告.txt
 458 | Loong/data/doc/financial/report_300749-2024-顶固集创-2024年一季度报告.txt
 459 | Loong/data/doc/financial/report_300761-2024-立华股份-2024年一季度报告.txt
 460 | Loong/data/doc/financial/report_300768-2024-迪普科技-2024年一季度报告.txt
 461 | Loong/data/doc/financial/report_300783-2024-三只松鼠-2024年一季度报告.txt
 462 | Loong/data/doc/financial/report_300800-2024-力合科技-2024年一季度报告.txt
 463 | Loong/data/doc/financial/report_300801-2024-泰和科技-2024年一季度报告.txt
 464 | Loong/data/doc/financial/report_300822-2024-贝仕达克-2024年一季度报告.txt
 465 | Loong/data/doc/financial/report_300840-2024-酷特智能-2024年一季度报告.txt
 466 | Loong/data/doc/financial/report_300848-2024-美瑞新材-2024年一季度报告.txt
 467 | Loong/data/doc/financial/report_300855-2024-图南股份-2024年一季度报告.txt
 468 | Loong/data/doc/financial/report_300858-2024-科拓生物-2024年一季度报告.txt
 469 | Loong/data/doc/financial/report_300862-2024-蓝盾光电-2024年一季度报告.txt
 470 | Loong/data/doc/financial/report_300867-2024-圣元环保-2024年一季度报告.txt
 471 | Loong/data/doc/financial/report_300882-2024-万胜智能-2024年一季度报告.txt
 472 | Loong/data/doc/financial/report_300887-2024-谱尼测试-2024年一季度报告.txt
 473 | Loong/data/doc/financial/report_300896-2024-爱美客-2024年一季度报告.txt
 474 | Loong/data/doc/financial/report_300898-2024-熊猫乳品-2024年一季度报告.txt
 475 | Loong/data/doc/financial/report_300900-2024-广联航空-2024年一季度报告.txt
 476 | Loong/data/doc/financial/report_300908-2024-仲景食品-2024年一季度报告.txt
 477 | Loong/data/doc/financial/report_300916-2024-朗特智能-2024年一季度报告.txt
 478 | Loong/data/doc/financial/report_300917-2024-特发服务-2024年一季度报告.txt
 479 | Loong/data/doc/financial/report_300923-2024-研奥股份-2024年一季度报告.txt
 480 | Loong/data/doc/financial/report_300939-2024-秋田微-2024年一季度报告.txt
 481 | Loong/data/doc/financial/report_300978-2024-东箭科技-2024年一季度报告.txt
 482 | Loong/data/doc/financial/report_300990-2024-同飞股份-2024年一季度报告.txt
 483 | Loong/data/doc/financial/report_300991-2024-创益通-2024年一季度报告.txt
 484 | Loong/data/doc/financial/report_301000-2024-肇民科技-2024年一季度报告.txt
 485 | Loong/data/doc/financial/report_301003-2024-江苏博云-2024年一季度报告.txt
 486 | Loong/data/doc/financial/report_301022-2024-海泰科-2024年一季度报告.txt
 487 | Loong/data/doc/financial/report_301046-2024-能辉科技-2024年一季度报告.txt
 488 | Loong/data/doc/financial/report_301050-2024-雷电微力-2024年一季度报告.txt
 489 | Loong/data/doc/financial/report_301058-2024-中粮科工-2024年一季度报告.txt
 490 | Loong/data/doc/financial/report_301070-2024-开勒股份-2024年一季度报告.txt
 491 | Loong/data/doc/financial/report_301075-2024-多瑞医药-2024年一季度报告.txt
 492 | Loong/data/doc/financial/report_301099-2024-雅创电子-2024年一季度报告.txt
 493 | Loong/data/doc/financial/report_301117-2024-佳缘科技-2024年一季度报告.txt
 494 | Loong/data/doc/financial/report_301120-2024-新特电气-2024年一季度报告.txt
 495 | Loong/data/doc/financial/report_301121-2024-紫建电子-2024年一季度报告.txt
 496 | Loong/data/doc/financial/report_301129-2024-瑞纳智能-2024年一季度报告.txt
 497 | Loong/data/doc/financial/report_301150-2024-中一科技-2024年一季度报告.txt
 498 | Loong/data/doc/financial/report_301156-2024-美农生物-2024年一季度报告.txt
 499 | Loong/data/doc/financial/report_301157-2024-华塑科技-2024年一季度报告.txt
 500 | Loong/data/doc/financial/report_301200-2024-大族数控-2024年一季度报告.txt
 501 | Loong/data/doc/financial/report_301219-2024-腾远钴业-2024年一季度报告.txt
 502 | Loong/data/doc/financial/report_301246-2024-宏源药业-2024年一季度报告.txt
 503 | Loong/data/doc/financial/report_301282-2024-金禄电子-2024年一季度报告.txt
 504 | Loong/data/doc/financial/report_301283-2024-聚胶股份-2024年一季度报告.txt
 505 | Loong/data/doc/financial/report_301285-2024-鸿日达-2024年一季度报告.txt
 506 | Loong/data/doc/financial/report_301288-2024-清研环境-2024年一季度报告.txt
 507 | Loong/data/doc/financial/report_301311-2024-昆船智能-2024年一季度报告.txt
 508 | Loong/data/doc/financial/report_301318-2024-维海德-2024年一季度报告.txt
 509 | Loong/data/doc/financial/report_301325-2024-曼恩斯特-2024年一季度报告.txt
 510 | Loong/data/doc/financial/report_301345-2024-涛涛车业-2024年第一季度报告修订版.txt
 511 | Loong/data/doc/financial/report_301357-2024-北方长龙-2024年一季度报告.txt
 512 | Loong/data/doc/financial/report_301368-2024-丰立智能-2024年一季度报告.txt
 513 | Loong/data/doc/financial/report_301370-2024-国科恒泰-2024年一季度报告.txt
 514 | Loong/data/doc/financial/report_301376-2024-致欧科技-2024年一季度报告.txt
 515 | Loong/data/doc/financial/report_301383-2024-天键股份-2024年一季度报告.txt
 516 | Loong/data/doc/financial/report_301388-2024-欣灵电气-2024年一季度报告.txt
 517 | Loong/data/doc/financial/report_301391-2024-卡莱特-2024年一季度报告.txt
 518 | Loong/data/doc/financial/report_301419-2024-阿莱德-2024年一季度报告.txt
 519 | Loong/data/doc/financial/report_301469-2024-恒达新材-2024年一季度报告.txt
 520 | Loong/data/doc/financial/report_301507-2024-民生健康-2024年一季度报告.txt
 521 | Loong/data/doc/financial/report_301517-2024-陕西华达-2024年一季度报告.txt
 522 | Loong/data/doc/financial/report_301550-2024-斯菱股份-2024年一季度报告.txt
 523 | Loong/data/doc/financial/report_301555-2024-惠柏新材-2024年一季度报告.txt
 524 | Loong/data/doc/financial/report_301589-2024-诺瓦星云-2024年一季度报告.txt
 525 | Loong/data/doc/financial/report_600123-2024-兰花科创-兰花科创2024年第一季度报告.txt
 526 | Loong/data/doc/financial/report_600232-2024-金鹰股份-金鹰股份2024年第一季度报告.txt
 527 | Loong/data/doc/financial/report_600282-2024-南钢股份-南京钢铁股份有限公司2024年第一季度报告.txt
 528 | Loong/data/doc/financial/report_600310-2024-广西能源-广西能源股份有限公司2024年第一季度报告.txt
 529 | Loong/data/doc/financial/report_600396-2024-ST金山-2024年第一季度报告.txt
 530 | Loong/data/doc/financial/report_600410-2024-华胜天成-2024年第一季度报告.txt
 531 | Loong/data/doc/financial/report_600436-2024-片仔癀-漳州片仔癀药业股份有限公司2024年第一季度报告.txt
 532 | Loong/data/doc/financial/report_600449-2024-宁夏建材-宁夏建材2024年第一季度报告.txt
 533 | Loong/data/doc/financial/report_600673-2024-东阳光-东阳光2024年第一季度报告.txt
 534 | Loong/data/doc/financial/report_600717-2024-天津港-天津港股份有限公司2024年第一季度报告.txt
 535 | Loong/data/doc/financial/report_600740-2024-山西焦化-山西焦化股份有限公司2024年第一季度报告.txt
 536 | Loong/data/doc/financial/report_600745-2024-闻泰科技-2024年第一季度报告.txt
 537 | Loong/data/doc/financial/report_600746-2024-江苏索普-江苏索普2024年第一季度报告.txt
 538 | Loong/data/doc/financial/report_600866-2024-星湖科技-2024年一季度报告.txt
 539 | Loong/data/doc/financial/report_600885-2024-宏发股份-宏发股份2024年第一季度报告.txt
 540 | Loong/data/doc/financial/report_600955-2024-维远股份-利华益维远化学股份有限公司2024年第一季度报告.txt
 541 | Loong/data/doc/financial/report_600980-2024-北矿科技-北矿科技2024年第一季度报告.txt
 542 | Loong/data/doc/financial/report_601100-2024-恒立液压-江苏恒立液压股份有限公司2024年第一季度报告.txt
 543 | Loong/data/doc/financial/report_601101-2024-昊华能源-北京昊华能源股份有限公司2024年第一季度报告.txt
 544 | Loong/data/doc/financial/report_601318-2024-中国平安-平安银行股份有限公司2024年第一季度报告.txt
 545 | Loong/data/doc/financial/report_601633-2024-长城汽车-长城汽车股份有限公司2024年第一季度报告.txt
 546 | Loong/data/doc/financial/report_601975-2024-招商南油-招商南油2024年第一季度报告.txt
 547 | Loong/data/doc/financial/report_603015-2024-弘讯科技-2024年一季度报告.txt
 548 | Loong/data/doc/financial/report_603059-2024-倍加洁-倍加洁集团股份有限公司2024年第一季度报告.txt
 549 | Loong/data/doc/financial/report_603062-2024-麦加芯彩-公司2024年第一季度报告.txt
 550 | Loong/data/doc/financial/report_603081-2024-大丰实业-浙江大丰实业股份有限公司2024年第一季度报告.txt
 551 | Loong/data/doc/financial/report_603093-2024-南华期货-南华期货股份有限公司2024年第一季度报告.txt
 552 | Loong/data/doc/financial/report_603097-2024-江苏华辰-江苏华辰2024年第一季度报告.txt
 553 | Loong/data/doc/financial/report_603132-2024-金徽股份-金徽股份2024年第一季度报告.txt
 554 | Loong/data/doc/financial/report_603156-2024-养元饮品-养元饮品2024年第一季度报告.txt
 555 | Loong/data/doc/financial/report_603206-2024-嘉环科技-2024年第一季度报告.txt
 556 | Loong/data/doc/financial/report_603298-2024-杭叉集团-杭叉集团2024年第一季度报告.txt
 557 | Loong/data/doc/financial/report_603309-2024-维力医疗-维力医疗2024年第一季度报告.txt
 558 | Loong/data/doc/financial/report_603505-2024-金石资源-金石资源集团股份有限公司2024年第一季度报告.txt
 559 | Loong/data/doc/financial/report_603558-2024-健盛集团-健盛集团2024年第一季度报告.txt
 560 | Loong/data/doc/financial/report_603585-2024-苏利股份-苏利股份2024年第一季度报告.txt
 561 | Loong/data/doc/financial/report_603629-2024-利通电子-利通电子2024年第一季度报告.txt
 562 | Loong/data/doc/financial/report_603679-2024-华体科技-四川华体照明科技股份有限公司2024年第一季度报告.txt
 563 | Loong/data/doc/financial/report_603713-2024-密尔克卫-密尔克卫智能供应链服务集团股份有限公司2024年第一季度报告.txt
 564 | Loong/data/doc/financial/report_603737-2024-三棵树-2024年第一季度报告.txt
 565 | Loong/data/doc/financial/report_603799-2024-华友钴业-华友钴业2024年第一季度报告.txt
 566 | Loong/data/doc/financial/report_603919-2024-金徽酒-金徽酒股份有限公司2024年第一季度报告.txt
 567 | Loong/data/doc/financial/report_603920-2024-世运电路-世运电路2024年第一季度报告.txt
 568 | Loong/data/doc/financial/report_603978-2024-深圳新星-2024年第一季度报告.txt
 569 | Loong/data/doc/financial/report_603982-2024-泉峰汽车-2024年第一季度报告.txt
 570 | Loong/data/doc/financial/report_605300-2024-佳禾食品-佳禾食品工业股份有限公司2024年一季报.txt
 571 | Loong/data/doc/financial/report_605389-2024-长龄液压-江苏长龄液压股份有限公司2024年一季度报告.txt
 572 | Loong/data/doc/financial/report_688084-2024-晶品特装-2024年第一季度报告.txt
 573 | Loong/data/doc/financial/report_688096-2024-京源环保-2024年第一季度报告.txt
 574 | Loong/data/doc/financial/report_688111-2024-金山办公-金山办公2024年第一季度报告.txt
 575 | Loong/data/doc/financial/report_688143-2024-长盈通-武汉长盈通光电技术股份有限公司2024年第一季度报告.txt
 576 | Loong/data/doc/financial/report_688267-2024-中触媒-中触媒新材料股份有限公司2024年第一季度报告.txt
 577 | Loong/data/doc/financial/report_688278-2024-特宝生物-特宝生物2024年第一季度报告.txt
 578 | Loong/data/doc/financial/report_688316-2024-青云科技-2024年第一季度报告.txt
 579 | Loong/data/doc/financial/report_688408-2024-中信博-中信博2024年第一季度报告.txt
 580 | Loong/data/doc/financial/report_688510-2024-航亚科技-无锡航亚科技股份有限公司2024年第一季度报告.txt
 581 | Loong/data/doc/financial/report_688516-2024-奥特维-无锡奥特维科技股份有限公司2024年第一季度报告.txt
 582 | Loong/data/doc/financial/report_688579-2024-山大地纬-山大地纬2023年第一季度报告修订版.txt
 583 | Loong/data/doc/financial/report_688626-2024-翔宇医疗-翔宇医疗2024年第一季度报告.txt
 584 | Loong/data/doc/financial/report_688628-2024-优利德-2024年第一季度报告.txt
 585 | Loong/data/doc/financial/report_688630-2024-芯碁微装-2024年第一季度报告.txt
 586 | Loong/data/doc/financial/report_688676-2024-金盘科技-2024年第一季度报告.txt
 587 | Loong/data/doc/financial/report_688682-2024-霍莱沃-2024年第一季度报告.txt
 588 | Loong/data/doc/financial/report_688722-2024-同益中-同益中2024年第一季度报告.txt
 589 | Loong/data/doc/financial/report_688793-2024-倍轻松-2024年第一季度报告.txt
 590 | Loong/data/doc/legal/legal.json
 591 | Loong/data/doc/paper/1508.01745.md
 592 | Loong/data/doc/paper/1604.05280.md
 593 | Loong/data/doc/paper/1604.05377.md
 594 | Loong/data/doc/paper/1605.00252.md
 595 | Loong/data/doc/paper/1610.04325.md
 596 | Loong/data/doc/paper/1612.04662.md
 597 | Loong/data/doc/paper/1709.03082.md
 598 | Loong/data/doc/paper/1802.03426.md
 599 | Loong/data/doc/paper/1802.08129.md
 600 | Loong/data/doc/paper/1803.08375.md
 601 | Loong/data/doc/paper/1804.04272.md
 602 | Loong/data/doc/paper/1901.02039.md
 603 | Loong/data/doc/paper/1901.05353.md
 604 | Loong/data/doc/paper/1902.05715.md
 605 | Loong/data/doc/paper/1902.07958.md
 606 | Loong/data/doc/paper/1906.00341.md
 607 | Loong/data/doc/paper/1907.02189.md
 608 | Loong/data/doc/paper/1908.03825.md
 609 | Loong/data/doc/paper/1909.04715.md
 610 | Loong/data/doc/paper/1909.11114.md
 611 | Loong/data/doc/paper/1909.11942.md
 612 | Loong/data/doc/paper/1910.01161.md
 613 | Loong/data/doc/paper/1910.10434.md
 614 | Loong/data/doc/paper/1910.10683.md
 615 | Loong/data/doc/paper/1911.00837.md
 616 | Loong/data/doc/paper/1911.07205.md
 617 | Loong/data/doc/paper/2001.04296.md
 618 | Loong/data/doc/paper/2001.09186.md
 619 | Loong/data/doc/paper/2002.03754.md
 620 | Loong/data/doc/paper/2003.10555.md
 621 | Loong/data/doc/paper/2004.08249.md
 622 | Loong/data/doc/paper/2004.15015.md
 623 | Loong/data/doc/paper/2005.10084.md
 624 | Loong/data/doc/paper/2005.12592.md
 625 | Loong/data/doc/paper/2007.01560.md
 626 | Loong/data/doc/paper/2007.06048.md
 627 | Loong/data/doc/paper/2008.07772.md
 628 | Loong/data/doc/paper/2009.02235.md
 629 | Loong/data/doc/paper/2010.00453.md
 630 | Loong/data/doc/paper/2011.09533.md
 631 | Loong/data/doc/paper/2012.04857.md
 632 | Loong/data/doc/paper/2101.03049.md
 633 | Loong/data/doc/paper/2103.01955.md
 634 | Loong/data/doc/paper/2103.05149.md
 635 | Loong/data/doc/paper/2103.11955.md
 636 | Loong/data/doc/paper/2104.13209.md
 637 | Loong/data/doc/paper/2105.01051.md
 638 | Loong/data/doc/paper/2105.14550.md
 639 | Loong/data/doc/paper/2106.04624.md
 640 | Loong/data/doc/paper/2106.07447.md
 641 | Loong/data/doc/paper/2107.01384.md
 642 | Loong/data/doc/paper/2107.04700.md
 643 | Loong/data/doc/paper/2108.08612.md
 644 | Loong/data/doc/paper/2109.00666.md
 645 | Loong/data/doc/paper/2109.09193.md
 646 | Loong/data/doc/paper/2109.11251.md
 647 | Loong/data/doc/paper/2111.14641.md
 648 | Loong/data/doc/paper/2112.04359.md
 649 | Loong/data/doc/paper/2112.09761.md
 650 | Loong/data/doc/paper/2112.10913.md
 651 | Loong/data/doc/paper/2112.13227.md
 652 | Loong/data/doc/paper/2201.00965.md
 653 | Loong/data/doc/paper/2201.11990.md
 654 | Loong/data/doc/paper/2203.09043.md
 655 | Loong/data/doc/paper/2203.15556.md
 656 | Loong/data/doc/paper/2204.03775.md
 657 | Loong/data/doc/paper/2205.10383.md
 658 | Loong/data/doc/paper/2205.11257.md
 659 | Loong/data/doc/paper/2206.06290.md
 660 | Loong/data/doc/paper/2207.06731.md
 661 | Loong/data/doc/paper/2207.11536.md
 662 | Loong/data/doc/paper/2208.12615.md
 663 | Loong/data/doc/paper/2209.02128.md
 664 | Loong/data/doc/paper/2209.08244.md
 665 | Loong/data/doc/paper/2209.08648.md
 666 | Loong/data/doc/paper/2209.12660.md
 667 | Loong/data/doc/paper/2209.12681.md
 668 | Loong/data/doc/paper/2209.13768.md
 669 | Loong/data/doc/paper/2210.09953.md
 670 | Loong/data/doc/paper/2210.10749.md
 671 | Loong/data/doc/paper/2210.11735.md
 672 | Loong/data/doc/paper/2210.14140.md
 673 | Loong/data/doc/paper/2210.15097.md
 674 | Loong/data/doc/paper/2210.16724.md
 675 | Loong/data/doc/paper/2211.04894.md
 676 | Loong/data/doc/paper/2211.05244.md
 677 | Loong/data/doc/paper/2211.05617.md
 678 | Loong/data/doc/paper/2211.10066.md
 679 | Loong/data/doc/paper/2211.10797.md
 680 | Loong/data/doc/paper/2212.04356.md
 681 | Loong/data/doc/paper/2212.10001.md
 682 | Loong/data/doc/paper/2212.13138.md
 683 | Loong/data/doc/paper/2301.05062.md
 684 | Loong/data/doc/paper/2301.08807.md
 685 | Loong/data/doc/paper/2301.10813.md
 686 | Loong/data/doc/paper/2301.13196.md
 687 | Loong/data/doc/paper/2302.05442.md
 688 | Loong/data/doc/paper/2302.09751.md
 689 | Loong/data/doc/paper/2302.13971.md
 690 | Loong/data/doc/paper/2302.14376.md
 691 | Loong/data/doc/paper/2303.07678.md
 692 | Loong/data/doc/paper/2303.08774.md
 693 | Loong/data/doc/paper/2303.08891.md
 694 | Loong/data/doc/paper/2303.12712.md
 695 | Loong/data/doc/paper/2303.13375.md
 696 | Loong/data/doc/paper/2303.13495.md
 697 | Loong/data/doc/paper/2303.13496.md
 698 | Loong/data/doc/paper/2303.13604.md
 699 | Loong/data/doc/paper/2303.15056.md
 700 | Loong/data/doc/paper/2303.16129.md
 701 | Loong/data/doc/paper/2303.16203.md
 702 | Loong/data/doc/paper/2304.03283.md
 703 | Loong/data/doc/paper/2304.03442.md
 704 | Loong/data/doc/paper/2304.04661.md
 705 | Loong/data/doc/paper/2304.06793.md
 706 | Loong/data/doc/paper/2304.08177.md
 707 | Loong/data/doc/paper/2304.08782.md
 708 | Loong/data/doc/paper/2304.12898.md
 709 | Loong/data/doc/paper/2304.14108.md
 710 | Loong/data/doc/paper/2305.00857.md
 711 | Loong/data/doc/paper/2305.01865.md
 712 | Loong/data/doc/paper/2305.02536.md
 713 | Loong/data/doc/paper/2305.03653.md
 714 | Loong/data/doc/paper/2305.06161.md
 715 | Loong/data/doc/paper/2305.07004.md
 716 | Loong/data/doc/paper/2305.08322.md
 717 | Loong/data/doc/paper/2305.12474.md
 718 | Loong/data/doc/paper/2305.13691.md
 719 | Loong/data/doc/paper/2305.14334.md
 720 | Loong/data/doc/paper/2305.14992.md
 721 | Loong/data/doc/paper/2305.15011.md
 722 | Loong/data/doc/paper/2305.15077.md
 723 | Loong/data/doc/paper/2305.15347.md
 724 | Loong/data/doc/paper/2305.16291.md
 725 | Loong/data/doc/paper/2305.16307.md
 726 | Loong/data/doc/paper/2305.16366.md
 727 | Loong/data/doc/paper/2305.17560.md
 728 | Loong/data/doc/paper/2305.17812.md
 729 | Loong/data/doc/paper/2305.18290.md
 730 | Loong/data/doc/paper/2306.02707.md
 731 | Loong/data/doc/paper/2306.03078.md
 732 | Loong/data/doc/paper/2306.05685.md
 733 | Loong/data/doc/paper/2306.06624.md
 734 | Loong/data/doc/paper/2306.07629.md
 735 | Loong/data/doc/paper/2306.09198.md
 736 | Loong/data/doc/paper/2306.11348.md
 737 | Loong/data/doc/paper/2306.11417.md
 738 | Loong/data/doc/paper/2306.11644.md
 739 | Loong/data/doc/paper/2306.15626.md
 740 | Loong/data/doc/paper/2306.16793.md
 741 | Loong/data/doc/paper/2306.17653.md
 742 | Loong/data/doc/paper/2307.03172.md
 743 | Loong/data/doc/paper/2307.03319.md
 744 | Loong/data/doc/paper/2307.03875.md
 745 | Loong/data/doc/paper/2307.04684.md
 746 | Loong/data/doc/paper/2307.06135.md
 747 | Loong/data/doc/paper/2307.09288.md
 748 | Loong/data/doc/paper/2307.09481.md
 749 | Loong/data/doc/paper/2307.10719.md
 750 | Loong/data/doc/paper/2307.15217.md
 751 | Loong/data/doc/paper/2307.15818.md
 752 | Loong/data/doc/paper/2307.16039.md
 753 | Loong/data/doc/paper/2307.16645.md
 754 | Loong/data/doc/paper/2307.16789.md
 755 | Loong/data/doc/paper/2307.16877.md
 756 | Loong/data/doc/paper/2308.00352.md
 757 | Loong/data/doc/paper/2308.01861.md
 758 | Loong/data/doc/paper/2308.05384.md
 759 | Loong/data/doc/paper/2308.07107.md
 760 | Loong/data/doc/paper/2308.08493.md
 761 | Loong/data/doc/paper/2308.08998.md
 762 | Loong/data/doc/paper/2308.10960.md
 763 | Loong/data/doc/paper/2308.11432.md
 764 | Loong/data/doc/paper/2308.12284.md
 765 | Loong/data/doc/paper/2308.12950.md
 766 | Loong/data/doc/paper/2308.13937.md
 767 | Loong/data/doc/paper/2309.00267.md
 768 | Loong/data/doc/paper/2309.01219.md
 769 | Loong/data/doc/paper/2309.01431.md
 770 | Loong/data/doc/paper/2309.03450.md
 771 | Loong/data/doc/paper/2309.04662.md
 772 | Loong/data/doc/paper/2309.05463.md
 773 | Loong/data/doc/paper/2309.05868.md
 774 | Loong/data/doc/paper/2309.05922.md
 775 | Loong/data/doc/paper/2309.06180.md
 776 | Loong/data/doc/paper/2309.07864.md
 777 | Loong/data/doc/paper/2309.08532.md
 778 | Loong/data/doc/paper/2309.08958.md
 779 | Loong/data/doc/paper/2309.09400.md
 780 | Loong/data/doc/paper/2309.10313.md
 781 | Loong/data/doc/paper/2309.10400.md
 782 | Loong/data/doc/paper/2309.11925.md
 783 | Loong/data/doc/paper/2309.12307.md
 784 | Loong/data/doc/paper/2309.12499.md
 785 | Loong/data/doc/paper/2309.12871.md
 786 | Loong/data/doc/paper/2309.15025.md
 787 | Loong/data/doc/paper/2309.15088.md
 788 | Loong/data/doc/paper/2309.16039.md
 789 | Loong/data/doc/paper/2309.17410.md
 790 | Loong/data/doc/paper/2309.17421.md
 791 | Loong/data/doc/paper/2309.17425.md
 792 | Loong/data/doc/paper/2309.17452.md
 793 | Loong/data/doc/paper/2309.17453.md
 794 | Loong/data/doc/paper/2310.00564.md
 795 | Loong/data/doc/paper/2310.00935.md
 796 | Loong/data/doc/paper/2310.01036.md
 797 | Loong/data/doc/paper/2310.01152.md
 798 | Loong/data/doc/paper/2310.01387.md
 799 | Loong/data/doc/paper/2310.01410.md
 800 | Loong/data/doc/paper/2310.01798.md
 801 | Loong/data/doc/paper/2310.01801.md
 802 | Loong/data/doc/paper/2310.02238.md
 803 | Loong/data/doc/paper/2310.02304.md
 804 | Loong/data/doc/paper/2310.02989.md
 805 | Loong/data/doc/paper/2310.03025.md
 806 | Loong/data/doc/paper/2310.03051.md
 807 | Loong/data/doc/paper/2310.03744.md
 808 | Loong/data/doc/paper/2310.04406.md
 809 | Loong/data/doc/paper/2310.04875.md
 810 | Loong/data/doc/paper/2310.05204.md
 811 | Loong/data/doc/paper/2310.05492.md
 812 | Loong/data/doc/paper/2310.05915.md
 813 | Loong/data/doc/paper/2310.06117.md
 814 | Loong/data/doc/paper/2310.06225.md
 815 | Loong/data/doc/paper/2310.06770.md
 816 | Loong/data/doc/paper/2310.06825.md
 817 | Loong/data/doc/paper/2310.06987.md
 818 | Loong/data/doc/paper/2310.07075.md
 819 | Loong/data/doc/paper/2310.07177.md
 820 | Loong/data/doc/paper/2310.07554.md
 821 | Loong/data/doc/paper/2310.07579.md
 822 | Loong/data/doc/paper/2310.08118.md
 823 | Loong/data/doc/paper/2310.08256.md
 824 | Loong/data/doc/paper/2310.08319.md
 825 | Loong/data/doc/paper/2310.08372.md
 826 | Loong/data/doc/paper/2310.08419.md
 827 | Loong/data/doc/paper/2310.08461.md
 828 | Loong/data/doc/paper/2310.08491.md
 829 | Loong/data/doc/paper/2310.08541.md
 830 | Loong/data/doc/paper/2310.08879.md
 831 | Loong/data/doc/paper/2310.09590.md
 832 | Loong/data/doc/paper/2310.10047.md
 833 | Loong/data/doc/paper/2310.10158.md
 834 | Loong/data/doc/paper/2310.10482.md
 835 | Loong/data/doc/paper/2310.10501.md
 836 | Loong/data/doc/paper/2310.10631.md
 837 | Loong/data/doc/paper/2310.10634.md
 838 | Loong/data/doc/paper/2310.10638.md
 839 | Loong/data/doc/paper/2310.10677.md
 840 | Loong/data/doc/paper/2310.11511.md
 841 | Loong/data/doc/paper/2310.11689.md
 842 | Loong/data/doc/paper/2310.12036.md
 843 | Loong/data/doc/paper/2310.12397.md
 844 | Loong/data/doc/paper/2310.12426.md
 845 | Loong/data/doc/paper/2310.12487.md
 846 | Loong/data/doc/paper/2310.12541.md
 847 | Loong/data/doc/paper/2310.12560.md
 848 | Loong/data/doc/paper/2310.12773.md
 849 | Loong/data/doc/paper/2310.12823.md
 850 | Loong/data/doc/paper/2310.12931.md
 851 | Loong/data/doc/paper/2310.12956.md
 852 | Loong/data/doc/paper/2310.13023.md
 853 | Loong/data/doc/paper/2310.13227.md
 854 | Loong/data/doc/paper/2310.13548.md
 855 | Loong/data/doc/paper/2310.13615.md
 856 | Loong/data/doc/paper/2310.13639.md
 857 | Loong/data/doc/paper/2310.13988.md
 858 | Loong/data/doc/paper/2310.14628.md
 859 | Loong/data/doc/paper/2310.15123.md
 860 | Loong/data/doc/paper/2310.15144.md
 861 | Loong/data/doc/paper/2310.15916.md
 862 | Loong/data/doc/paper/2310.16450.md
 863 | Loong/data/doc/paper/2310.16570.md
 864 | Loong/data/doc/paper/2310.16789.md
 865 | Loong/data/doc/paper/2310.16795.md
 866 | Loong/data/doc/paper/2310.17623.md
 867 | Loong/data/doc/paper/2310.17631.md
 868 | Loong/data/doc/paper/2310.17976.md
 869 | Loong/data/doc/paper/2310.18465.md
 870 | Loong/data/doc/paper/2310.18969.md
 871 | Loong/data/doc/paper/2310.19046.md
 872 | Loong/data/doc/paper/2310.19102.md
 873 | Loong/data/doc/paper/2310.19784.md
 874 | Loong/data/doc/paper/2310.19852.md
 875 | Loong/data/doc/paper/2310.20329.md
 876 | Loong/data/doc/paper/2310.20689.md
 877 | Loong/data/doc/paper/2310.20707.md
 878 | Loong/data/doc/paper/2311.00423.md
 879 | Loong/data/doc/paper/2311.01192.md
 880 | Loong/data/doc/paper/2311.03348.md
 881 | Loong/data/doc/paper/2311.04850.md
 882 | Loong/data/doc/paper/2311.04897.md
 883 | Loong/data/doc/paper/2311.05232.md
 884 | Loong/data/doc/paper/2311.05997.md
 885 | Loong/data/doc/paper/2311.07463.md
 886 | Loong/data/doc/paper/2311.07911.md
 887 | Loong/data/doc/paper/2311.08045.md
 888 | Loong/data/doc/paper/2311.08252.md
 889 | Loong/data/doc/paper/2311.08401.md
 890 | Loong/data/doc/paper/2311.08711.md
 891 | Loong/data/doc/paper/2311.08803.md
 892 | Loong/data/doc/paper/2311.09215.md
 893 | Loong/data/doc/paper/2311.09677.md
 894 | Loong/data/doc/paper/2311.10081.md
 895 | Loong/data/doc/paper/2311.10702.md
 896 | Loong/data/doc/paper/2311.11045.md
 897 | Loong/data/doc/paper/2311.11797.md
 898 | Loong/data/doc/paper/2311.11855.md
 899 | Loong/data/doc/paper/2311.12420.md
 900 | Loong/data/doc/paper/2311.12983.md
 901 | Loong/data/doc/paper/2311.13503.md
 902 | Loong/data/doc/paper/2311.13721.md
 903 | Loong/data/doc/paper/2311.14115.md
 904 | Loong/data/doc/paper/2311.14520.md
 905 | Loong/data/doc/paper/2311.15249.md
 906 | Loong/data/doc/paper/2311.15451.md
 907 | Loong/data/doc/paper/2311.16079.md
 908 | Loong/data/doc/paper/2311.16090.md
 909 | Loong/data/doc/paper/2311.16169.md
 910 | Loong/data/doc/paper/2311.16452.md
 911 | Loong/data/doc/paper/2311.16502.md
 912 | Loong/data/doc/paper/2311.16867.md
 913 | Loong/data/doc/paper/2311.17035.md
 914 | Loong/data/doc/paper/2311.17541.md
 915 | Loong/data/doc/paper/2311.17946.md
 916 | Loong/data/doc/paper/2311.18677.md
 917 | Loong/data/doc/paper/2311.18743.md
 918 | Loong/data/doc/paper/2311.18760.md
 919 | Loong/data/doc/paper/2312.00164.md
 920 | Loong/data/doc/paper/2312.01797.md
 921 | Loong/data/doc/paper/2312.02418.md
 922 | Loong/data/doc/paper/2312.04724.md
 923 | Loong/data/doc/paper/2312.04985.md
 924 | Loong/data/doc/paper/2312.06585.md
 925 | Loong/data/doc/paper/2312.07395.md
 926 | Loong/data/doc/paper/2312.07551.md
 927 | Loong/data/doc/paper/2312.07930.md
 928 | Loong/data/doc/paper/2312.08358.md
 929 | Loong/data/doc/paper/2312.08914.md
 930 | Loong/data/doc/paper/2312.09085.md
 931 | Loong/data/doc/paper/2312.09241.md
 932 | Loong/data/doc/paper/2312.09244.md
 933 | Loong/data/doc/paper/2312.09390.md
 934 | Loong/data/doc/paper/2312.10997.md
 935 | Loong/data/doc/paper/2312.11444.md
 936 | Loong/data/doc/paper/2312.12575.md
 937 | Loong/data/doc/paper/2312.12683.md
 938 | Loong/data/doc/paper/2312.13771.md
 939 | Loong/data/doc/paper/2312.14302.md
 940 | Loong/data/doc/paper/2312.15166.md
 941 | Loong/data/doc/paper/2312.15791.md
 942 | Loong/data/doc/paper/2312.16682.md
 943 | Loong/data/doc/paper/2312.17122.md
 944 | Loong/data/doc/paper/2312.17173.md
 945 | Loong/data/doc/paper/2312.17235.md
 946 | Loong/data/doc/paper/2312.17238.md
 947 | Loong/data/doc/paper/2401.00071.md
 948 | Loong/data/doc/paper/2401.00211.md
 949 | Loong/data/doc/paper/2401.00368.md
 950 | Loong/data/doc/paper/2401.00595.md
 951 | Loong/data/doc/paper/2401.00812.md
 952 | Loong/data/doc/paper/2401.01055.md
 953 | Loong/data/doc/paper/2401.01141.md
 954 | Loong/data/doc/paper/2401.01275.md
 955 | Loong/data/doc/paper/2401.01286.md
 956 | Loong/data/doc/paper/2401.01325.md
 957 | Loong/data/doc/paper/2401.01335.md
 958 | Loong/data/doc/paper/2401.01614.md
 959 | Loong/data/doc/paper/2401.01854.md
 960 | Loong/data/doc/paper/2401.01879.md
 961 | Loong/data/doc/paper/2401.02015.md
 962 | Loong/data/doc/paper/2401.02051.md
 963 | Loong/data/doc/paper/2401.02385.md
 964 | Loong/data/doc/paper/2401.02954.md
 965 | Loong/data/doc/paper/2401.03462.md
 966 | Loong/data/doc/paper/2401.04056.md
 967 | Loong/data/doc/paper/2401.04088.md
 968 | Loong/data/doc/paper/2401.04398.md
 969 | Loong/data/doc/paper/2401.04620.md
 970 | Loong/data/doc/paper/2401.04621.md
 971 | Loong/data/doc/paper/2401.04695.md
 972 | Loong/data/doc/paper/2401.05268.md
 973 | Loong/data/doc/paper/2401.05302.md
 974 | Loong/data/doc/paper/2401.05561.md
 975 | Loong/data/doc/paper/2401.05654.md
 976 | Loong/data/doc/paper/2401.05778.md
 977 | Loong/data/doc/paper/2401.05856.md
 978 | Loong/data/doc/paper/2401.06059.md
 979 | Loong/data/doc/paper/2401.06066.md
 980 | Loong/data/doc/paper/2401.06080.md
 981 | Loong/data/doc/paper/2401.06102.md
 982 | Loong/data/doc/paper/2401.06118.md
 983 | Loong/data/doc/paper/2401.06121.md
 984 | Loong/data/doc/paper/2401.06201.md
 985 | Loong/data/doc/paper/2401.06209.md
 986 | Loong/data/doc/paper/2401.06373.md
 987 | Loong/data/doc/paper/2401.06468.md
 988 | Loong/data/doc/paper/2401.06760.md
 989 | Loong/data/doc/paper/2401.06954.md
 990 | Loong/data/doc/paper/2401.07102.md
 991 | Loong/data/doc/paper/2401.07184.md
 992 | Loong/data/doc/paper/2401.07324.md
 993 | Loong/data/doc/paper/2401.07897.md
 994 | Loong/data/doc/paper/2401.08281.md
 995 | Loong/data/doc/paper/2401.08358.md
 996 | Loong/data/doc/paper/2401.08406.md
 997 | Loong/data/doc/paper/2401.08417.md
 998 | Loong/data/doc/paper/2401.08541.md
 999 | Loong/data/doc/paper/2401.09395.md
1000 | Loong/data/doc/paper/2401.09670.md
1001 | Loong/data/doc/paper/2401.10020.md
1002 | Loong/data/doc/paper/2401.10034.md
1003 | Loong/data/doc/paper/2401.10302.md
1004 | Loong/data/doc/paper/2401.10774.md
1005 | Loong/data/doc/paper/2401.10935.md
1006 | Loong/data/doc/paper/2401.11181.md
1007 | Loong/data/doc/paper/2401.11708.md
1008 | Loong/data/doc/paper/2401.11817.md
1009 | Loong/data/doc/paper/2401.11888.md
1010 | Loong/data/doc/paper/2401.12187.md
1011 | Loong/data/doc/paper/2401.12963.md
1012 | Loong/data/doc/paper/2401.13601.md
1013 | Loong/data/doc/paper/2401.13649.md
1014 | Loong/data/doc/paper/2401.13919.md
1015 | Loong/data/doc/paper/2401.14887.md
1016 | Loong/data/doc/paper/2401.15360.md
1017 | Loong/data/doc/paper/2401.16185.md
1018 | Loong/data/doc/paper/2401.17882.md
1019 | Loong/data/doc/paper/2401.18018.md
1020 | Loong/data/doc/paper/2401.18079.md
1021 | Loong/data/doc/paper/2402.00157.md
1022 | Loong/data/doc/paper/2402.00159.md
1023 | Loong/data/doc/paper/2402.01030.md
1024 | Loong/data/doc/paper/2402.01145.md
1025 | Loong/data/doc/paper/2402.01306.md
1026 | Loong/data/doc/paper/2402.01622.md
1027 | Loong/data/doc/paper/2402.01739.md
1028 | Loong/data/doc/paper/2402.01817.md
1029 | Loong/data/doc/paper/2402.02057.md
1030 | Loong/data/doc/paper/2402.02416.md
1031 | Loong/data/doc/paper/2402.02716.md
1032 | Loong/data/doc/paper/2402.03620.md
1033 | Loong/data/doc/paper/2402.04247.md
1034 | Loong/data/doc/paper/2402.05861.md
1035 | Loong/data/doc/paper/2402.09178.md
1036 | Loong/data/doc/paper/2402.10104.md
1037 | Loong/data/doc/paper/2402.10705.md
1038 | Loong/data/doc/paper/2402.12348.md
1039 | Loong/data/doc/paper/2402.14328.md
1040 | Loong/data/doc/paper/2402.14658.md
1041 | Loong/data/doc/paper/2402.14830.md
1042 | Loong/data/doc/paper/2402.15205.md
1043 | Loong/data/doc/paper/2402.16627.md
1044 | Loong/data/doc/paper/2402.16981.md
1045 | Loong/data/doc/paper/2402.17563.md
1046 | Loong/data/doc/paper/2402.19113.md
1047 | Loong/data/doc/paper/2403.00278.md
1048 | Loong/data/doc/paper/2403.01876.md
1049 | Loong/data/doc/paper/2403.01979.md
1050 | Loong/data/doc/paper/2403.03101.md
1051 | Loong/data/doc/paper/2403.03230.md
1052 | Loong/data/doc/paper/2403.05530.md
1053 | Loong/data/doc/paper/2403.06560.md
1054 | Loong/data/doc/paper/2403.07059.md
1055 | Loong/data/doc/paper/2403.07559.md
1056 | Loong/data/doc/paper/2403.08140.md
1057 | Loong/data/doc/paper/2403.08251.md
1058 | Loong/data/doc/paper/2403.08337.md
1059 | Loong/data/doc/paper/2403.08570.md
1060 | Loong/data/doc/paper/2403.09308.md
1061 | Loong/data/doc/paper/2403.09746.md
1062 | Loong/data/doc/paper/2403.12806.md
1063 | Loong/data/doc/paper/2403.13309.md
1064 | Loong/data/doc/paper/2403.13355.md
1065 | Loong/data/doc/paper/2403.13438.md
1066 | Loong/data/doc/paper/2403.14123.md
1067 | Loong/data/doc/paper/2403.15556.md
1068 | Loong/data/doc/paper/2403.19154.md
1069 | Loong/data/doc/paper/2403.19949.md
1070 | Loong/data/doc/paper/2403.19962.md
1071 | Loong/data/doc/paper/2403.20208.md
1072 | Loong/data/doc/paper/2404.00376.md
1073 | Loong/data/doc/paper/2404.01349.md
1074 | Loong/data/doc/paper/2404.02152.md
1075 | Loong/data/doc/paper/2404.02575.md
1076 | Loong/data/doc/paper/2404.04292.md
1077 | Loong/data/doc/paper/2404.04850.md
1078 | Loong/data/doc/paper/2404.04925.md
1079 | Loong/data/doc/paper/2404.05264.md
1080 | Loong/data/doc/paper/2404.05443.md
1081 | Loong/data/doc/paper/2404.05955.md
1082 | Loong/data/doc/paper/2404.05971.md
1083 | Loong/data/doc/paper/2404.06654.md
1084 | Loong/data/doc/paper/2404.07084.md
1085 | Loong/data/doc/paper/2404.09356.md
1086 | Loong/data/doc/paper/2404.09486.md
1087 | Loong/data/doc/paper/2404.09699.md
1088 | Loong/data/doc/paper/2404.10150.md
1089 | Loong/data/doc/paper/2404.11269.md
1090 | Loong/data/doc/paper/2404.11276.md
1091 | Loong/data/doc/paper/2404.11912.md
1092 | Loong/data/doc/paper/2404.11932.md
1093 | Loong/data/doc/paper/2404.12135.md
1094 | Loong/data/doc/paper/2404.12318.md
1095 | Loong/data/doc/paper/2404.12736.md
1096 | Loong/data/doc/paper/2404.13066.md
1097 | Loong/data/doc/paper/2404.13591.md
1098 | Loong/data/doc/paper/2404.13886.md
1099 | Loong/data/doc/paper/2404.14082.md
1100 | Loong/data/doc/paper/2404.14122.md
1101 | Loong/data/doc/paper/2404.14215.md
1102 | Loong/data/doc/paper/2404.14387.md
1103 | Loong/data/doc/paper/2404.14688.md
1104 | Loong/data/doc/paper/2404.15247.md
1105 | Loong/data/doc/paper/2404.15378.md
1106 | Loong/data/doc/paper/2404.15381.md
1107 | Loong/data/doc/paper/2404.15488.md
1108 | Loong/data/doc/paper/2404.15676.md
1109 | Loong/data/doc/paper/2404.15939.md
1110 | Loong/data/doc/paper/2404.16375.md
1111 | Loong/data/doc/paper/2404.16792.md
1112 | Loong/data/doc/paper/2404.16906.md
1113 | Loong/data/doc/paper/2404.17153.md
1114 | Loong/data/doc/paper/2404.17833.md
1115 | Loong/data/doc/paper/2404.18231.md
1116 | Loong/data/doc/paper/2404.18239.md
1117 | Loong/data/doc/paper/2404.18311.md
1118 | Loong/data/doc/paper/2404.18353.md
1119 | Loong/data/doc/paper/2404.18824.md
1120 | Loong/data/doc/paper/2404.18852.md
1121 | Loong/data/doc/paper/2404.18911.md
1122 | Loong/data/doc/paper/2404.19505.md
1123 | Loong/data/doc/paper/2404.19553.md
1124 | Loong/data/doc/paper/2404.19705.md
1125 | Loong/data/doc/paper/2405.00263.md
1126 | Loong/data/doc/paper/2405.01029.md
1127 | Loong/data/doc/paper/2405.01147.md
1128 | Loong/data/doc/paper/2405.01378.md
1129 | Loong/data/doc/paper/2405.01868.md
1130 | Loong/data/doc/paper/2405.02019.md
1131 | Loong/data/doc/paper/2405.02421.md
1132 | Loong/data/doc/paper/2405.02714.md
1133 | Loong/data/doc/paper/2405.03007.md
1134 | Loong/data/doc/paper/2405.03085.md
1135 | Loong/data/doc/paper/2405.03097.md
1136 | Loong/data/doc/paper/2405.03133.md
1137 | Loong/data/doc/paper/2405.03456.md
1138 | Loong/data/doc/paper/2405.03488.md
1139 | Loong/data/doc/paper/2405.03547.md
1140 | Loong/data/doc/paper/2405.03644.md
1141 | Loong/data/doc/paper/2405.03710.md
1142 | Loong/data/doc/paper/2405.03917.md
1143 | Loong/data/doc/paper/2405.04028.md
1144 | Loong/data/doc/paper/2405.04237.md
1145 | Loong/data/doc/paper/2405.04434.md
1146 | Loong/data/doc/paper/2405.04437.md
1147 | Loong/data/doc/paper/2405.04497.md
1148 | Loong/data/doc/paper/2405.04781.md
1149 | Loong/data/doc/paper/2405.05136.md
1150 | Loong/data/doc/paper/2405.05465.md
1151 | Loong/data/doc/paper/2405.05824.md
1152 | Loong/data/doc/paper/2405.05885.md
1153 | Loong/data/doc/paper/2405.05904.md
1154 | Loong/data/doc/paper/2405.05945.md
1155 | Loong/data/doc/paper/2405.05950.md
1156 | Loong/data/doc/paper/2405.05955.md
1157 | Loong/data/doc/paper/2405.06093.md
1158 | Loong/data/doc/paper/2405.06107.md
1159 | Loong/data/doc/paper/2405.06161.md
1160 | Loong/data/doc/paper/2405.06219.md
1161 | Loong/data/doc/paper/2405.06331.md
1162 | Loong/data/doc/paper/2405.06856.md
1163 | Loong/data/doc/paper/2405.07435.md
1164 | Loong/data/doc/paper/2405.07437.md
1165 | Loong/data/doc/paper/2405.07460.md
1166 | Loong/data/doc/paper/2405.07482.md
1167 | Loong/data/doc/paper/2405.07518.md
1168 | Loong/data/doc/paper/2405.07628.md
1169 | Loong/data/doc/paper/2405.07657.md
1170 | Loong/data/doc/paper/2405.07745.md
1171 | Loong/data/doc/paper/2405.07761.md
1172 | Loong/data/doc/paper/2405.07764.md
1173 | Loong/data/doc/paper/2405.07770.md
1174 | Loong/data/doc/paper/2405.07898.md
1175 | Loong/data/doc/paper/2405.07960.md
1176 | Loong/data/doc/paper/2405.08054.md
1177 | Loong/data/doc/paper/2405.08135.md
1178 | Loong/data/doc/paper/2405.08289.md
1179 | Loong/data/doc/paper/2405.08480.md
1180 | Loong/data/doc/paper/2405.08514.md
1181 | Loong/data/doc/paper/2405.08542.md
1182 | Loong/data/doc/paper/2405.08555.md
1183 | Loong/data/doc/paper/2405.08582.md
1184 | Loong/data/doc/paper/2405.08745.md
1185 | Loong/data/doc/paper/2405.08748.md
1186 | Loong/data/doc/paper/2405.08813.md
1187 | Loong/data/doc/paper/2405.08839.md
1188 | Loong/data/doc/paper/2405.08863.md
1189 | Loong/data/doc/paper/2405.08888.md
1190 | Loong/data/doc/paper/2405.08908.md
1191 | Loong/data/doc/paper/2405.08920.md
1192 | Loong/data/doc/paper/2405.08981.md
1193 | Loong/data/doc/paper/2405.09115.md
1194 | Loong/data/doc/paper/2405.09117.md
1195 | Loong/data/doc/paper/2405.09142.md
1196 | Loong/data/doc/paper/2405.09204.md
1197 | Loong/data/doc/paper/2405.09251.md
1198 | Loong/data/doc/paper/2405.09255.md
1199 | Loong/data/doc/paper/2405.09266.md
1200 | Loong/data/doc/paper/2405.09276.md
1201 | Loong/data/doc/paper/2405.09285.md
1202 | Loong/data/doc/paper/2405.09306.md
1203 | Loong/data/doc/paper/2405.09330.md
1204 | Loong/data/doc/paper/2405.09341.md
1205 | Loong/data/doc/paper/2405.09395.md
1206 | Loong/data/doc/paper/2405.09711.md
1207 | Loong/data/doc/paper/2405.10150.md
1208 | Loong/data/doc/paper/2405.10480.md
1209 | Loong/data/doc/paper/2405.10516.md
1210 | Loong/data/doc/paper/2405.10632.md
1211 | Loong/data/doc/paper/2405.11120.md
1212 | Loong/data/doc/paper/2405.11537.md
1213 | Loong/data/doc/paper/2405.11612.md
1214 | Loong/data/doc/paper/2405.11647.md
1215 | Loong/data/doc/paper/2405.11804.md
1216 | Loong/data/doc/paper/2405.12523.md
1217 | Loong/data/doc/paper/2405.12532.md
1218 | Loong/data/doc/paper/2405.12648.md
1219 | Loong/data/doc/paper/2405.12750.md
1220 | Loong/data/doc/paper/2405.12819.md
1221 | Loong/data/doc/paper/2405.12910.md
1222 | Loong/data/doc/paper/2405.12933.md
1223 | Loong/data/doc/paper/2405.12979.md
1224 | Loong/data/doc/paper/2405.12999.md
1225 | Loong/data/doc/paper/2405.13050.md
1226 | Loong/data/doc/paper/2405.13816.md
1227 | Loong/data/doc/paper/2405.13929.md
1228 | Loong/data/doc/paper/2405.13966.md
1229 | Loong/data/doc/paper/2405.14169.md
1230 | Loong/data/doc/paper/2405.14205.md
1231 | Loong/data/doc/paper/2405.14231.md
1232 | Loong/data/doc/paper/2405.14573.md
1233 | Loong/data/doc/paper/2405.14591.md
1234 | Loong/data/doc/paper/2405.14722.md
1235 | Loong/data/doc/paper/2405.14734.md
1236 | Loong/data/doc/paper/2405.14751.md
1237 | Loong/data/doc/paper/2405.14785.md
1238 | Loong/data/doc/paper/2405.14804.md
1239 | Loong/data/doc/paper/2405.14828.md
1240 | Loong/data/doc/paper/2405.14930.md
1241 | Loong/data/doc/paper/2405.14974.md
1242 | Loong/data/doc/paper/2405.15007.md
1243 | Loong/data/doc/paper/2405.15032.md
1244 | Loong/data/doc/paper/2405.15116.md
1245 | Loong/data/doc/paper/2405.15130.md
1246 | Loong/data/doc/paper/2405.15160.md
1247 | Loong/data/doc/paper/2405.15194.md
1248 | Loong/data/doc/paper/2405.15287.md
1249 | Loong/data/doc/paper/2405.15306.md
1250 | Loong/data/doc/paper/2405.15307.md
1251 | Loong/data/doc/paper/2405.15341.md
1252 | Loong/data/doc/paper/2405.15349.md
1253 | Loong/data/doc/paper/2405.15604.md
1254 | Loong/data/doc/paper/2405.15614.md
1255 | Loong/data/doc/paper/2405.15638.md
1256 | Loong/data/doc/paper/2405.15652.md
1257 | Loong/data/doc/paper/2405.15756.md
1258 | Loong/data/doc/paper/2405.15984.md
1259 | Loong/data/doc/paper/2405.16009.md
1260 | Loong/data/doc/paper/2405.16247.md
1261 | Loong/data/doc/paper/2405.16276.md
1262 | Loong/data/doc/paper/2405.16388.md
1263 | Loong/data/doc/paper/2405.16406.md
1264 | Loong/data/doc/paper/2405.16444.md
1265 | Loong/data/doc/paper/2405.16510.md
1266 | Loong/data/doc/paper/2405.16528.md
1267 | Loong/data/doc/paper/2405.16533.md
1268 | Loong/data/doc/paper/2405.16661.md
1269 | Loong/data/doc/paper/2405.16681.md
1270 | Loong/data/doc/paper/2405.16714.md
1271 | Loong/data/doc/paper/2405.16783.md
1272 | Loong/data/doc/paper/2405.16821.md
1273 | Loong/data/doc/paper/2405.16833.md
1274 | Loong/data/doc/paper/2405.16847.md
1275 | Loong/data/doc/paper/2405.16908.md
1276 | Loong/data/doc/paper/2405.17051.md
1277 | Loong/data/doc/paper/2405.17147.md
1278 | Loong/data/doc/paper/2405.17220.md
1279 | Loong/data/doc/paper/2405.17233.md
1280 | Loong/data/doc/paper/2405.17249.md
1281 | Loong/data/doc/paper/2405.17374.md
1282 | Loong/data/doc/paper/2405.17386.md
1283 | Loong/data/doc/paper/2405.17438.md
1284 | Loong/data/doc/paper/2405.17512.md
1285 | Loong/data/doc/paper/2405.17602.md
1286 | Loong/data/doc/paper/2405.17653.md
1287 | Loong/data/doc/paper/2405.17741.md
1288 | Loong/data/doc/paper/2405.17820.md
1289 | Loong/data/doc/paper/2405.17915.md
1290 | Loong/data/doc/paper/2405.17927.md
1291 | Loong/data/doc/paper/2405.17935.md
1292 | Loong/data/doc/paper/2405.17950.md
1293 | Loong/data/doc/paper/2405.17969.md
1294 | Loong/data/doc/paper/2405.18027.md
1295 | Loong/data/doc/paper/2405.18111.md
1296 | Loong/data/doc/paper/2405.18137.md
1297 | Loong/data/doc/paper/2405.18193.md
1298 | Loong/data/doc/paper/2405.18208.md
1299 | Loong/data/doc/paper/2405.18272.md
1300 | Loong/data/doc/paper/2405.18320.md
1301 | Loong/data/doc/paper/2405.18348.md
1302 | Loong/data/doc/paper/2405.18392.md
1303 | Loong/data/doc/paper/2405.18415.md
1304 | Loong/data/doc/paper/2405.18573.md
1305 | Loong/data/doc/paper/2405.18628.md
1306 | Loong/data/doc/paper/2405.18718.md
1307 | Loong/data/doc/paper/2405.18886.md
1308 | Loong/data/doc/paper/2405.18922.md
1309 | Loong/data/doc/paper/2405.19010.md
1310 | Loong/data/doc/paper/2405.19086.md
1311 | Loong/data/doc/paper/2405.19103.md
1312 | Loong/data/doc/paper/2405.19107.md
1313 | Loong/data/doc/paper/2405.19226.md
1314 | Loong/data/doc/paper/2405.19261.md
1315 | Loong/data/doc/paper/2405.19262.md
1316 | Loong/data/doc/paper/2405.19313.md
1317 | Loong/data/doc/paper/2405.19316.md
1318 | Loong/data/doc/paper/2405.19323.md
1319 | Loong/data/doc/paper/2405.19327.md
1320 | Loong/data/doc/paper/2405.19425.md
1321 | Loong/data/doc/paper/2405.19524.md
1322 | Loong/data/doc/paper/2405.19534.md
1323 | Loong/data/doc/paper/2405.19550.md
1324 | Loong/data/doc/paper/2405.19563.md
1325 | Loong/data/doc/paper/2405.19616.md
1326 | Loong/data/doc/paper/2405.19668.md
1327 | Loong/data/doc/paper/2405.19715.md
1328 | Loong/data/doc/paper/2405.19716.md
1329 | Loong/data/doc/paper/2405.19806.md
1330 | Loong/data/doc/paper/2405.19846.md
1331 | Loong/data/doc/paper/2405.19888.md
1332 | Loong/data/doc/paper/2405.19973.md
1333 | Loong/data/doc/paper/2405.20092.md
1334 | Loong/data/doc/paper/2405.20099.md
1335 | Loong/data/doc/paper/2405.20175.md
1336 | Loong/data/doc/paper/2405.20215.md
1337 | Loong/data/doc/paper/2405.20216.md
1338 | Loong/data/doc/paper/2405.20304.md
1339 | Loong/data/doc/paper/2405.20314.md
1340 | Loong/data/doc/paper/2405.20347.md
1341 | Loong/data/doc/paper/2405.20362.md
1342 | Loong/data/doc/paper/2405.20404.md
1343 | Loong/data/doc/paper/2405.20512.md
1344 | Loong/data/doc/paper/2405.20625.md
1345 | Loong/data/doc/paper/2405.20703.md
1346 | Loong/data/doc/paper/2405.20773.md
1347 | Loong/data/doc/paper/2405.20774.md
1348 | Loong/data/doc/paper/2405.20778.md
1349 | Loong/data/doc/paper/2405.20830.md
1350 | Loong/data/doc/paper/2405.20947.md
1351 | Loong/data/doc/paper/2405.20974.md
1352 | Loong/data/doc/paper/2405.21018.md
1353 | Loong/data/doc/paper/2405.21040.md
1354 | Loong/data/doc/paper/2405.21046.md
1355 | Loong/output/qwen/loong_evaluate.jsonl
1356 | Loong/output/qwen/loong_generate.jsonl
1357 | 


--------------------------------------------------------------------------------
/Loong/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2023 Alibaba Cloud
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/Loong/README.md:
--------------------------------------------------------------------------------
1 | please download data from https://drive.google.com/file/d/1WnoiR0pADg_DEvfrPUe7bhzjnqAbahBH/view?usp=sharing
2 | 
3 | input datas are in Loong/data/loong_process.jsonl          
4 | output datas are in Loong/output/qwen
5 | 
6 | ```cd src && bash run.sh``` can get results as in the paper


--------------------------------------------------------------------------------
/Loong/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi==0.111.0
 2 | numpy==2.0.0
 3 | openai==1.35.7
 4 | pandas==2.2.2
 5 | pydantic==2.8.0
 6 | PyYAML==6.0.1
 7 | Requests==2.32.3
 8 | sentencepiece==0.2.0
 9 | safetensors==0.4.3
10 | sse_starlette==2.1.2
11 | tiktoken==0.7.0
12 | torch==2.3.0
13 | tqdm==4.66.4
14 | transformers==4.41.2
15 | uvicorn==0.30.1
16 | vllm==0.5.0.post1
17 | xformers==0.0.26.post1
18 | vllm-flash-attn==2.5.9
19 | anthropic==0.30.1
20 | google-generativeai==0.7.1


--------------------------------------------------------------------------------
/Loong/src/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ARGS=()
 4 | # MODEL
 5 | MODEL="qwen"
 6 | MODEL_CONFIG="$MODEL.yaml"
 7 | EVAL_MODEL_CONFIG="gpt4o.yaml"
 8 | # INPUT PATH
 9 | DOC_PATH="../data/doc"
10 | INPUT_PATH="../data/loong.jsonl"
11 | MODEL_CONFIG_DIR="../config/models"
12 | # OUTPUT PATH
13 | OUTPUT_PROCESS_PATH="../data/loong_process.jsonl"
14 | OUTPUT_PATH="../output/$MODEL/loong_generate.jsonl"
15 | OUTPUT_EVALUATE_PATH="../output/$MODEL/loong_evaluate.jsonl"
16 | # ARGUMENTS
17 | MAX_LENGTH="128000" # According to the context window of llm. The value of config takes precedence
18 | PROCESS_NUM_GEN="3" # Concurrency number of model generate
19 | PROCESS_NUM_EVAL="20" # Concurrency number of model eval
20 | DEBUG_NUM="-1" # -1 means all data
21 | 
22 | while [[ $# -gt 0 ]]; do
23 |     case "$1" in
24 |         --model)
25 |             shift
26 |             MODEL="$1"
27 |             MODEL_CONFIG="$MODEL.yaml"
28 |             OUTPUT_PATH="../output/$MODEL/loong_generate.jsonl"
29 |             OUTPUT_EVALUATE_PATH="../output/$MODEL/loong_evaluate.jsonl"
30 |             ;;
31 |         --continue_gen)
32 |             ARGS+="--continue_gen"
33 |             ;;
34 |         *)
35 |             echo "unknown parameter: $1"
36 |             exit 1
37 |             ;;
38 |     esac
39 |     shift
40 | done
41 | echo "MODEL=[$MODEL], MODEL_CONFIG=[$MODEL_CONFIG]"
42 | 
43 | ARGS+=(
44 |   "--models" "$MODEL_CONFIG"
45 |   "--eval_model" "$EVAL_MODEL_CONFIG"
46 |   "--debug_num" "$DEBUG_NUM"
47 |   "--doc_path" "$DOC_PATH"
48 |   "--input_path" "$INPUT_PATH"
49 |   "--output_process_path" "$OUTPUT_PROCESS_PATH"
50 |   "--output_path" "$OUTPUT_PATH"
51 |   "--evaluate_output_path" "$OUTPUT_EVALUATE_PATH"
52 |   "--max_length" "$MAX_LENGTH"
53 |   "--model_config_dir" "$MODEL_CONFIG_DIR"
54 |   "--process_num_gen" "$PROCESS_NUM_GEN"
55 |   "--process_num_eval" "$PROCESS_NUM_EVAL"
56 | )
57 | 
58 | # Execute in order
59 | # python step1_load_data.py "${ARGS[@]}"
60 | # python step2_model_generate.py "${ARGS[@]}"
61 | python step3_model_evaluate.py "${ARGS[@]}"
62 | python step4_cal_metric.py "${ARGS[@]}"
63 | 


--------------------------------------------------------------------------------
/Loong/src/step1_load_data.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import random
 3 | import os
 4 | from utils.args import parse_arguments
 5 | from utils.prompt import get_generate_prompts
 6 | from utils.util import count_lines, logger
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     args = parse_arguments()
11 |     random.seed(args.seed)
12 |     logger.debug(f"args: {args}")
13 |     ## step1
14 |     if not os.path.exists(args.output_process_path) or (args.debug_num > 0 and count_lines(args.output_process_path) != args.debug_num) or (args.debug_num < 0 and count_lines(args.output_process_path) != count_lines(args.input_path)):
15 |         generate_prompts = get_generate_prompts(args)
16 | 
17 |         with open(args.output_process_path, 'w') as f:
18 |             for p in generate_prompts:
19 |                 f.write(json.dumps(p, ensure_ascii=False, separators=(',', ':')) + "\n")
20 |     else:
21 |         logger.debug(f"Path exist: {args.output_process_path}")
22 | 


--------------------------------------------------------------------------------
/Loong/src/step2_model_generate.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import random
 3 | import os
 4 | from utils.args import parse_arguments
 5 | from utils.config import load
 6 | from utils.generate import generate
 7 | from utils.util import create_path, continue_gen, logger
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     args = parse_arguments()
12 |     random.seed(args.seed)
13 | 
14 |     config = load(open(f"{args.model_config_dir}/{args.models}"))
15 |     # The value of config takes precedence
16 |     if config.get('run_args', {}).get('max_length', None):
17 |         args.max_length = config.get('run_args', {}).get('max_length', None)
18 |         logger.debug(f"config value: max_length={args.max_length} takes precedences")
19 | 
20 |     tag = "generate_response"
21 | 
22 |     with open(args.output_process_path, "r") as f:
23 |         generate_data = [json.loads(item.strip()) for item in f.readlines()]
24 | 
25 |     if not os.path.exists(args.output_path):
26 |         create_path(args.output_path)
27 |         # api
28 |         generate(generate_data, config, args.output_path, args.process_num_gen, tag=tag)
29 |     else:
30 | 
31 |         if args.continue_gen:
32 |             continue_generate_data = continue_gen(args.output_path, generate_data, tag=tag)
33 |             # api
34 |             generate(continue_generate_data, config, args.output_path, args.process_num_gen, tag=tag)
35 |         else:
36 |             logger.debug(f"Path exist: {args.output_path}")
37 | 


--------------------------------------------------------------------------------
/Loong/src/step3_model_evaluate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from utils.args import parse_arguments
 3 | from utils.prompt import get_evaluate_prompts
 4 | from utils.generate import generate
 5 | from utils.util import create_path, continue_gen, logger
 6 | from utils.config import load
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     args = parse_arguments()
11 | 
12 |     eval_config = load(open(f"{args.model_config_dir}/{args.eval_model}"))
13 |     evaluate_prompts = get_evaluate_prompts(args, tag="generate_response")
14 |     tag = "eval_response"
15 | 
16 |     if not os.path.exists(args.evaluate_output_path):
17 |         create_path(args.evaluate_output_path)
18 |         generate(evaluate_prompts, eval_config, args.evaluate_output_path, args.process_num_eval, tag=tag)
19 |     else:
20 |         if args.continue_gen:
21 |             continue_evaluate_prompts = continue_gen(args.evaluate_output_path, evaluate_prompts, tag=tag)
22 |             generate(continue_evaluate_prompts, eval_config, args.evaluate_output_path, args.process_num_eval,
23 |                      tag=tag)
24 |         else:
25 |             logger.debug(f"Path exist: {args.evaluate_output_path}")
26 | 


--------------------------------------------------------------------------------
/Loong/src/step4_cal_metric.py:
--------------------------------------------------------------------------------
 1 | from utils.args import parse_arguments
 2 | from utils.metric import cal_metric
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     args = parse_arguments()
 7 | 
 8 |     print("------------------ All metrics: ------------------")
 9 |     cal_metric(args, tag="eval_response")
10 |     print("")
11 | 
12 |     print(f"------------------ Level metrics: ------------------")
13 |     for level in [1, 2, 3, 4]:
14 |         print(f"------------------ Level {level} metrics: ------------------")
15 |         cal_metric(args, tag="eval_response", level=level)
16 |     print("")
17 | 
18 |     print(f"------------------ Set metrics: ------------------")
19 |     for set in [1, 2, 3, 4]:
20 |         print(f"------------------ Set {set} metrics ------------------")
21 |         for level in [1, 2, 3, 4]:
22 |             cal_metric(args, tag="eval_response", set=set, level=level)
23 |         cal_metric(args, tag="eval_response", set=set, level=None)
24 |         print("")
25 | 


--------------------------------------------------------------------------------
/Loong/src/test.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | MODEL_CONFIG="1"
 3 | ARGS=(
 4 |   "--models" "$MODEL_CONFIG"
 5 |   "--eval_model" "$MODEL_CONFIG"
 6 |   "--debug_num" "$MODEL_CONFIG"
 7 |   "--doc_path" "$MODEL_CONFIG"
 8 |   "--input_path" "$MODEL_CONFIG"
 9 |   "--output_process_path" "$MODEL_CONFIG"
10 |   "--output_path" "$MODEL_CONFIG"
11 |   "--evaluate_output_path" "$MODEL_CONFIG"
12 |   "--max_length" "$MODEL_CONFIG"
13 |   "--model_config_dir" "$MODEL_CONFIG"
14 |   "--process_num_gen" "$MODEL_CONFIG"
15 |   "--process_num_eval" "$MODEL_CONFIG"
16 |   "--rag"
17 |   "--tmp" "$MODEL_CONFIG"
18 | )
19 | 
20 | # Check whether the incoming parameters contain --continue_gen
21 | for param in "$@"; do
22 |   if [ "$param" == "--continue_gen" ]; then
23 |     ARGS+=("--continue_gen")
24 |   fi
25 | done
26 | 
27 | echo "${ARGS[@]}"


--------------------------------------------------------------------------------
/Loong/src/utils/args.py:
--------------------------------------------------------------------------------
 1 | #!/user/bin/env python
 2 | # coding=utf-8
 3 | import argparse
 4 | 
 5 | 
 6 | def parse_arguments():
 7 |     parser = argparse.ArgumentParser(description='args for evaluate.py')
 8 |     parser.add_argument("--models", "-c", default="gpt4o.yaml")
 9 |     parser.add_argument("--eval_model", type=str, default="gpt4.yaml")
10 |     parser.add_argument('--debug_num', type=int, default=15, help="Control the number of generated items. If <0, it means using all data")
11 |     parser.add_argument('--shuffle_prompts', action="store_true")
12 |     parser.add_argument('--debug_level', type=str, default="1,2,3,4", help="Represents the level to be evaluated, eg: 1,2 or 3")
13 |     parser.add_argument('--debug_set', type=str, default="1,2,3,4", help="Represents the set level to be evaluated, eg: 1,2 or 3")
14 |     parser.add_argument('--process_num_gen', type=int, default=10)
15 |     parser.add_argument('--process_num_eval', type=int, default=10)
16 |     parser.add_argument('--seed', type=int, default=1000000007)
17 |     parser.add_argument('--ratio', type=float, default=1)
18 |     parser.add_argument('--doc_path', type=str, default='./doc')
19 |     parser.add_argument('--input_path', type=str, default='../data/loong.jsonl')
20 |     parser.add_argument('--output_process_path', type=str, default='../data/loong_process.jsonl')
21 |     parser.add_argument('--output_path', type=str, default='../output/loong_generate.jsonl')
22 |     parser.add_argument('--evaluate_output_path', type=str, default='../output/loong_evaluate.jsonl')
23 |     parser.add_argument('--max_length', type=int, default=300000)
24 |     parser.add_argument('--domain', type=str, default='', help='financial, paper, legal')
25 |     parser.add_argument('--add_noise', action="store_true", help="A boolean flag that defaults to False")
26 |     parser.add_argument('--rag', action="store_true", help="whether to use rag model")
27 |     parser.add_argument('--rag_num', type=int, help="recall top n")
28 |     parser.add_argument('--continue_gen', action="store_true", help="whether to continue_generate from exist file")
29 |     parser.add_argument('--model_config_dir', type=str, default='../config/models')
30 | 
31 |     args = parser.parse_args()
32 |     return args
33 | 
34 | 


--------------------------------------------------------------------------------
/Loong/src/utils/config.py:
--------------------------------------------------------------------------------
 1 | #!/user/bin/env python
 2 | # coding=utf-8
 3 | '''
 4 | @project : loong
 5 | @author  : fucheng
 6 | #@file   : config.py
 7 | #@ide    : PyCharm
 8 | #@time   : 2024-06-02 13:39:36
 9 | '''
10 | import functools
11 | import os
12 | from typing import Any, Dict
13 | 
14 | import yaml
15 | 
16 | class ExtLoaderMeta(type):
17 |     def __new__(metacls: Any, __name__: str, __bases__: Any, __dict__: Dict) -> Any:
18 |         """Add include constructer to class."""
19 | 
20 |         # register the include constructor on the class
21 |         cls = super().__new__(metacls, __name__, __bases__, __dict__)
22 |         cls.add_constructor("!include", cls.construct_include)
23 | 
24 |         return cls
25 | 
26 | 
27 | class ExtLoader(yaml.Loader, metaclass=ExtLoaderMeta):
28 |     """YAML Loader with `!include` constructor."""
29 | 
30 |     def __init__(self, stream: Any) -> None:
31 |         """Initialise Loader."""
32 | 
33 |         try:
34 |             self._root = os.path.split(stream.name)[0]
35 |         except AttributeError:
36 |             self._root = os.path.curdir
37 | 
38 |         super().__init__(stream)
39 | 
40 |     def construct_include(self, node: Any) -> str:
41 |         """Include file referenced at node."""
42 | 
43 |         filename = os.path.abspath(
44 |             os.path.join(self._root, str(self.construct_scalar(node)))
45 |         )
46 |         extension = os.path.splitext(filename)[1].lstrip(".")
47 | 
48 |         with open(filename, "r") as f:
49 |             if extension in ("yaml", "yml"):
50 |                 return yaml.load(f, ExtLoader)
51 |             else:
52 |                 return "".join(f.readlines())
53 | 
54 | 
55 | # Set MyLoader as default.
56 | load = functools.partial(yaml.load, Loader=ExtLoader)
57 | 


--------------------------------------------------------------------------------
/Loong/src/utils/generate.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from tqdm import tqdm
  3 | import multiprocessing
  4 | import requests
  5 | import numpy as np
  6 | from functools import partial
  7 | from decimal import Decimal
  8 | import numpy as np
  9 | import time
 10 | from openai import OpenAI
 11 | from anthropic import Anthropic
 12 | # import google.generativeai as genai
 13 | 
 14 | class MyEncoder(json.JSONEncoder):
 15 |     def default(self, obj):
 16 |         if isinstance(obj, np.ndarray):
 17 |             return obj.tolist()
 18 |         elif isinstance(obj, bytes):
 19 |             try:
 20 |                 return str(obj, encoding='utf-8')
 21 |             except:
 22 |                 return str(obj, encoding='gbk')
 23 |         elif isinstance(obj, Decimal):
 24 |             return float(obj)
 25 |         # print(obj, type(obj))
 26 |         return json.JSONEncoder.default(self, obj)
 27 | 
 28 | 
 29 | def get_api_results(prompt_input, config):
 30 |     prompt = prompt_input['prompt']
 31 | 
 32 |     if config['type'] == 'openai' or config['type'] == 'vllm':
 33 |         # client = OpenAI(api_key=config['args']['api_key'],
 34 |         #                 base_url=config['args']['api_url'] if config['args']['api_url']!='' else None)
 35 |         # try: 
 36 |         #     response = client.chat.completions.create(
 37 |         #         messages=[{"role": "user","content": prompt}],
 38 |         #         model=config['args']['api_name'],
 39 |         #         temperature=config['run_args']['temperature']
 40 |         #     )
 41 |         #     return response.choices[0].message.content
 42 |         try:
 43 |             url = "http://47.88.8.18:8088/api/ask"
 44 |             headers = {
 45 |                 "Content-Type": "application/json",
 46 |                 "Authorization": "Bearer eyJ0eXAiOiJqd3QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VybmFtZSI6IjIzNzgzNiIsInBhc3N3b3JkIjoiMjM3ODM2MTIzIiwiZXhwIjoyMDMxMzc2MjA0fQ.Lz6IKLMUTWWT5isamrYTmbAcGNFpAqt87YFF2bynP3w"
 47 |             }
 48 |             raw_info = {
 49 |                 "model": config['args']['api_name'],
 50 |                 "messages": [{"role": "user", "content": prompt}],
 51 |                 "temperature": config['run_args']['temperature'],
 52 |             }
 53 |             callback = requests.post(url, data=json.dumps(raw_info), headers=headers, timeout=(10000, 10000))
 54 |             # print(callback)
 55 |             result = callback.json()
 56 |             # uid = "_".join(result['uid'].split('-'))
 57 |             # usage = result['data']['response']['usage']
 58 |             # json.dump({'uid': uid, 'usage': usage}, open(f'/root/multiagent_doc2graph/Loong/token_num_statics/{uid}.json', 'w'))
 59 |             return result['data']['response']['choices'][0]['message']['content']
 60 |         except Exception as e:
 61 |                 print(e)
 62 |                 return []
 63 |         
 64 |     elif config['type'] == 'gemini':
 65 |         genai.configure(api_key=config['args']['api_key'])
 66 | 
 67 |         model = genai.GenerativeModel(name=config['args']['api_name'])
 68 |         try:
 69 |             response = model.generate_content(prompt,
 70 |                         generation_config=genai.types.GenerationConfig(
 71 |                         temperature=config['run_args']['temperature']))
 72 |             return response.text
 73 |         except Exception as e:
 74 |             print(e)
 75 |             return []
 76 |     
 77 |     elif config['type'] == 'claude':
 78 |         client = Anthropic(api_key=config['args']['api_key'])
 79 |         try:
 80 |             message = client.messages.create(
 81 |                 messages=[{"role": "user", "content": prompt,}],
 82 |                 model=config['args']['api_name'],
 83 |             )
 84 |             return message.content
 85 |         except Exception as e:
 86 |             print(e)
 87 |             return []
 88 |     
 89 |     elif config['type'] == 'http':
 90 |         headers = {"Content-Type": "application/json",
 91 |                 "Authorization": config['args']['api_key']}
 92 |         raw_info = {
 93 |             "model": config['args']['api_name'],
 94 |             "messages": [{"role": "user", "content": prompt}],
 95 |             "n": 1}
 96 |         raw_info.update(config['run_args'])
 97 |         try:
 98 |             callback = requests.post(config['args']['api_url'], data=json.dumps(raw_info, cls=MyEncoder), headers=headers,
 99 |                                     timeout=(600, 600))
100 |             result = callback.json()
101 |             # todo: customize the result
102 |             return result['data']['response']['choices'][0]['message']['content']
103 |         except Exception as e:
104 |             print(e)
105 |             return []
106 |         
107 |     else:
108 |         raise f"type of {config['type']} is not valid"
109 | 
110 | def fetch_api_result(prompt_input, config, max_retries=5):
111 |     """Attempt to get a valid result from the API, with a maximum number of retries."""
112 |     for _ in range(max_retries):
113 |         result = get_api_results(prompt_input, config)
114 |         if result: 
115 |             return result
116 |         # Sleep briefly to not hammer the API in case of errors or rate limits
117 |         time.sleep(5) # Uncomment if needed
118 |     return None
119 | 
120 | 
121 | def api(prompt, output_path, config, tag):
122 |     response_content = fetch_api_result(prompt, config)
123 |     result = prompt.copy()
124 |     result[tag] = response_content or ""
125 |     with open(output_path, 'a', encoding='utf-8') as fw:
126 |         fw.write(json.dumps(result, ensure_ascii=False) + '\n')
127 | 
128 | 
129 | def generate(prompts, config, output_path, process_num, tag):
130 |     func = partial(api, output_path=output_path, config=config, tag=tag)
131 |     with multiprocessing.Pool(processes=process_num) as pool:
132 |         for _ in tqdm(pool.imap(func, prompts), total=len(prompts)):
133 |             pass


--------------------------------------------------------------------------------
/Loong/src/utils/metric.py:
--------------------------------------------------------------------------------
 1 | import re, json
 2 | import numpy as np
 3 | 
 4 | 
 5 | def extract_number(text):
 6 |     match = re.search(r'\[\[([0-9]*\.?[0-9]+)\]\]', text)
 7 |     if match:
 8 |         return float(match.group(1))
 9 |     match = re.search(r'\[([0-9]*\.?[0-9]+)\]', text)
10 |     if match:
11 |         return float(match.group(1))
12 |     return None
13 | 
14 | 
15 | def failure_prompts(args, tag):
16 |     eval_lines = open(args.old_evaluate_output_path).readlines()
17 |     gen_lines = open(args.old_output_path).readlines()
18 |     scores = []
19 |     effective_samples = []
20 |     no_effective_samples = []
21 |     for line in eval_lines:
22 |         line = json.loads(line.strip())
23 |         if not extract_number(line[tag]) or line['generate_response'] == "":
24 |             no_effective_samples.append(line['id'])
25 |     for line in gen_lines:
26 |         line = json.loads(line.strip())
27 |         if line['id'] in no_effective_samples:
28 |             effective_samples.append(
29 |                 {'id': line['id'], 'prompt': line['prompt'], 'question': line['question'], 'answer': line['answer']})
30 |     return effective_samples
31 | 
32 | 
33 | def cal_metric(args, tag, level=None, set=None):
34 |     lines = open(args.evaluate_output_path).readlines()
35 |     scores = []
36 |     effective_samples = []
37 |     no_effective_samples = []
38 |     for line in lines:
39 |         line = json.loads(line.strip())
40 | 
41 |         _level = line.get("level", None)
42 |         _set = line.get("set", None)
43 |         if level and _level and _level != level:
44 |             continue
45 |         if set and _set and _set != set:
46 |             continue
47 | 
48 |         if extract_number(line[tag]) is not None:
49 |             scores.append(extract_number(line[tag]))
50 |             effective_samples.append(line)
51 |         else:
52 |             no_effective_samples.append(line['id'])
53 | 
54 |     num_full_marks = sum(1 for x in scores if x == 100)
55 |     try:
56 |         metric = (len(effective_samples) / len(lines), np.mean(scores), f"{num_full_marks}/{len(effective_samples)}", num_full_marks / len(effective_samples))
57 | 
58 |         print(f"level: {level}, set: {set}, scoring_success_rate: {metric[0]:.2f} , avg_score: {metric[1]:.2f} , perfect_rate_calculation: {metric[2]} , perfect_rate: {metric[3]:.2f}")
59 |         return metric
60 | 
61 |     except:
62 |         print(f"level: {level}, set: {set}, scoring_success_rate:0.00, avg_score:0.00, perfect_rate_calculation:0/0, perfect_rate:0.00")
63 |         return None


--------------------------------------------------------------------------------
/Loong/src/utils/prompt.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from tqdm import tqdm
  3 | import random
  4 | import uuid
  5 | from pathlib import Path
  6 | import glob
  7 | from .token_length import token_length
  8 | import io
  9 | 
 10 | 
 11 | file_handle_cache = {}
 12 | 
 13 | def close_cached_files():
 14 |     for file, handle in file_handle_cache.items():
 15 |         if isinstance(handle, io.IOBase):
 16 |             handle.close()
 17 |     file_handle_cache.clear()
 18 | 
 19 | 
 20 | def get_content(args, item, doc_name, idx):
 21 |     global file_handle_cache
 22 |     doc_type, doc_level = item['type'], item['level']
 23 |     docPath = Path(args.doc_path) / doc_type
 24 | 
 25 |     if doc_type == 'financial':
 26 |         if str(doc_level).strip() != '4':
 27 |             _file = glob.glob(f"{docPath}/*2024-{doc_name}*.txt")[0]
 28 |         else:
 29 |             _file = glob.glob(f"{docPath}/*{doc_name}*.txt")[0]
 30 |         try:
 31 |             with open(_file, 'r') as txt_file:
 32 |                 _doc_name = Path(_file).stem.split('-')[-1]
 33 |                 # doc = f"<标题起始符>《{_doc_name}》<标题终止符>\n" + txt_file.read() + "<doc终止符>\n\n"
 34 |                 doc = f"《{_doc_name}》\n" + txt_file.read() + "\n\n"
 35 |         except IOError:
 36 |             print(f"Error: File {_file} could not be opened.")
 37 | 
 38 |     elif doc_type == 'paper':
 39 |         path = docPath / doc_name
 40 |         try:
 41 |             with open(path, 'r') as txt_file:
 42 |                 content = txt_file.read()
 43 |                 doc_name = content.split('\n', 1)[0].strip("#").strip()
 44 |                 # doc = f"<标题起始符>{doc_name}<标题终止符>\n" + content + "<doc终止符>\n\n"
 45 |                 doc = f"{doc_name}\n" + content + "\n\n"
 46 |         except IOError:
 47 |             print(f"Error: File {path} could not be opened.")
 48 | 
 49 |     elif doc_type == 'legal':
 50 |         _file = docPath / "legal.json"
 51 |         if _file in file_handle_cache:
 52 |             legal_js = file_handle_cache[_file]
 53 |             # txt_file.seek(0)
 54 |         else:
 55 |             with open(_file, 'r') as txt_file:
 56 |                 legal_js = json.load(txt_file)
 57 |                 file_handle_cache[_file] = legal_js
 58 | 
 59 |         if doc_level == 4 and ('阅读以上判决文书，我将给你若干份判决结果：' in item['instruction']):
 60 |             content = legal_js[doc_name]["content"]
 61 |         else:
 62 |             content = legal_js[doc_name]["content"] + legal_js[doc_name]["result"]
 63 |         # doc = f"<标题起始符>《判决文书{idx + 1}》<标题终止符>\n" + content + "<doc终止符>\n\n"
 64 |         doc = f"《判决文书{idx + 1}》\n" + content + "\n\n"
 65 | 
 66 |     else:
 67 |         raise "doc_type not valid!"
 68 | 
 69 |     return doc
 70 | 
 71 | 
 72 | def get_contents(args, item, doc_names):
 73 |     contents = []
 74 |     for idx, doc_name in enumerate(doc_names):
 75 |         content = get_content(args, item, doc_name, idx)
 76 |         contents.append(content)
 77 |     return contents
 78 | 
 79 | 
 80 | def get_doc_str(args, item, prompt_template):
 81 |     len_prompt_template = token_length(prompt_template) - token_length("{docs}")
 82 |     is_shuffle = item.get("shuffle_doc", True)
 83 | 
 84 |     docs = item['doc'] if not args.rag else item["recall_chunks"][:args.rag_num]
 85 |     docs_list = []
 86 | 
 87 |     if args.rag:
 88 |         for doc in docs:
 89 |             if len_prompt_template + sum(token_length(s) for s in docs_list) + token_length(doc) > args.max_length:
 90 |                 continue
 91 |             docs_list.append(doc)
 92 |     else:
 93 |         # read content from given doc names
 94 |         contents = get_contents(args, item, docs)
 95 |         # shuffle
 96 |         if is_shuffle and item['type'] == 'financial':
 97 |             random.shuffle(contents)
 98 |         for content in contents:
 99 |             if len_prompt_template + sum(token_length(s) for s in docs_list) + token_length(content) > args.max_length:
100 |                 continue
101 |             docs_list.append(content)
102 | 
103 |     # shuffle
104 |     if is_shuffle:
105 |         random.shuffle(docs_list)
106 |     docs_str = "".join(docs_list)
107 |     return docs_str
108 | 
109 | 
110 | def get_generate_prompt(args, item):
111 |     replace_dict = {"{question}": item['question'], "{instruction}": item['instruction']}
112 |     prompt_template = item['prompt_template']
113 |     for k, v in replace_dict.items():
114 |         prompt_template = prompt_template.replace(k, v)
115 |     doc_str = get_doc_str(args, item, prompt_template)
116 |     prompt_template = prompt_template.replace("{docs}", doc_str)
117 |     item['docs'] = doc_str
118 |     item['prompt'] = prompt_template
119 |     return item
120 | 
121 | 
122 | def get_generate_prompts(args):
123 |     prompts = []
124 |     with open(args.input_path, 'r') as file:
125 |         lines = file.readlines()
126 | 
127 |         if args.shuffle_prompts:
128 |             random.shuffle(lines)
129 |         # debug num samples
130 |         if args.debug_num and args.debug_num > 0:
131 |             lines = lines[:args.debug_num]
132 |         if args.ratio != 1:
133 |             random.shuffle(lines)
134 |             lines = lines[int(len(prompts) * args.ratio):]
135 | 
136 |         for line in tqdm(lines, desc="gen_prompts"):
137 |             item = json.loads(line)
138 |             doc_type, set_level, level = item['type'], item['set'], item['level']
139 |             # filter
140 |             if args.domain.strip():
141 |                 domains = args.domain.strip().split(",")
142 |                 domains = list(map(lambda x: x.strip(), domains))
143 |                 if doc_type not in domains:
144 |                     continue
145 |             if args.debug_set.strip():
146 |                 sets = args.debug_set.strip().split(",")
147 |                 sets = list(map(int, sets))
148 |                 if set_level not in sets:
149 |                     continue
150 |             if args.debug_level.strip():
151 |                 levels = args.debug_level.strip().split(",")
152 |                 levels = list(map(int, levels))
153 |                 if level not in levels:
154 |                     continue
155 | 
156 |             prompt = get_generate_prompt(args, item)
157 |             prompts.append(prompt)
158 |     close_cached_files()
159 |     return prompts
160 | 
161 | 
162 | def get_evaluate_prompts(args, tag):
163 |     prompt = '''[Question]
164 | {}
165 | 
166 | [Gold Answer]
167 | {}
168 | 
169 | [The Start of Assistant's Predicted Answer]
170 | {}
171 | [The End of Assistant's Predicted Answer]
172 | 
173 | [System]
174 | We would like to request your feedback on the performance of the AI assistant in response to the user question displayed above according to the gold answer. Please use the following listed aspects and their descriptions as evaluation criteria:
175 |     - Accuracy and Hallucinations: The assistant's answer is semantically consistent with the gold answer; The numerical value and order need to be accurate, and there should be no hallucinations.
176 |     - Completeness: Referring to the reference answers, the assistant's answer should contain all the key points needed to answer the user's question; further elaboration on these key points can be omitted.
177 | Please rate whether this answer is suitable for the question. Please note that the gold answer can be considered as a correct answer to the question.
178 | 
179 | The assistant receives an overall score on a scale of 1 to 100, where a higher score indicates better overall performance.
180 | Please note that if the assistant's answer and the gold answer fully meet the above criteria, its overall rating should be the full marks (100).
181 | Please first provide a comprehensive explanation of your evaluation, avoiding any potential bias.
182 | Then, output a line indicating the score of the Assistant.
183 | 
184 | PLEASE OUTPUT WITH THE FOLLOWING FORMAT, WHERE THE SCORE IS A SCALE OF 1 TO 100 BY STRICTLY FOLLOWING THIS FORMAT: "[[score]]", FOR EXAMPLE "Rating: [[100]]":
185 | <start output>
186 | Evaluation evidence: your evluation explanation here, no more than 100 words
187 | Rating: [[score]]
188 | <end output>
189 | 
190 | Now, start your evaluation:'''
191 |     prompts = []
192 |     lines = open(args.output_path).readlines()
193 |     for line in lines:
194 |         line = json.loads(line.strip())
195 |         line.pop('docs', '')
196 |         doc_type, question, instruction = line['type'], line['question'], line['instruction']
197 |         prompt_template = line['prompt_template']
198 |         if doc_type != "paper":
199 |             prompt_template = prompt_template.replace("{docs}", "")
200 |         question = prompt_template.replace("{question}", question).replace("{instruction}", instruction)
201 |         answer = line['answer']
202 |         predict = line[tag]
203 |         line['prompt'] = prompt.format(question, answer, predict)
204 |         prompts.append(line)
205 |     return prompts
206 | 


--------------------------------------------------------------------------------
/Loong/src/utils/token_length.py:
--------------------------------------------------------------------------------
1 | import tiktoken
2 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
3 | def token_length(text):
4 |     return len(encoding.encode(text, disallowed_special=()))
5 | 
6 | if __name__ == "__main__":
7 |     res = token_length("{docs}")
8 |     print(res)


--------------------------------------------------------------------------------
/Loong/src/utils/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | from colorlog import ColoredFormatter
 5 | 
 6 | 
 7 | def count_lines(file_path):
 8 |     with open(file_path, 'r', encoding='utf-8') as file:
 9 |         return sum(1 for _ in file)
10 | 
11 | 
12 | def create_path(file_path):
13 |     directory = os.path.dirname(file_path)
14 |     if not os.path.exists(directory):
15 |         os.makedirs(directory)
16 | 
17 | 
18 | def continue_gen(input_path, gen_data, tag):
19 |     seen_id = dict()
20 |     with open(input_path, 'r') as f:
21 |         for item in f.readlines():
22 |             js = json.loads(item.strip())
23 |             if js[tag]:
24 |                 seen_id[js['id']] = js
25 |     rewrite_data, continue_generate_data = [], []
26 |     seen_rewrite = set()
27 |     for item in gen_data:
28 |         _id = item['id']
29 |         if _id in seen_rewrite:
30 |             continue
31 |         if _id not in seen_id:
32 |             continue_generate_data.append(item)
33 |         else:
34 |             rewrite_data.append(seen_id[_id])
35 |         # dedup
36 |         seen_rewrite.add(_id)
37 |     with open(input_path, 'w') as f:
38 |         for item in rewrite_data:
39 |             f.write(json.dumps(item, ensure_ascii=False) + '\n')
40 |     print(f"continue_gen: input_path={input_path}, rewrite_data_num={len(rewrite_data)}, tag={tag}")
41 |     return continue_generate_data
42 | 
43 | 
44 | 
45 | def setup_logger(name='Loong', level=logging.DEBUG):
46 |     # create
47 |     logger = logging.getLogger(name)
48 |     logger.setLevel(level)
49 | 
50 |     # Avoid adding repeatedly
51 |     if not logger.hasHandlers():
52 |         # log level
53 |         console_handler = logging.StreamHandler()
54 |         console_handler.setLevel(level)
55 | 
56 |         # color
57 |         formatter = ColoredFormatter(
58 |             '%(log_color)s%(asctime)s (%(name)s - %(levelname)s)  %(message)s',
59 |             datefmt='%Y-%m-%d %H:%M:%S',
60 |             log_colors={
61 |                 'DEBUG': 'green',
62 |                 'INFO': 'green',
63 |                 'WARNING': 'yellow',
64 |                 'ERROR': 'red',
65 |                 'CRITICAL': 'bold_red',
66 |             }
67 |         )
68 | 
69 |         # 将格式设置到处理器
70 |         console_handler.setFormatter(formatter)
71 | 
72 |         # 将处理器添加到记录器
73 |         logger.addHandler(console_handler)
74 | 
75 |     return logger
76 | 
77 | logger = setup_logger()


--------------------------------------------------------------------------------
/Loong/src/vllm_example.sh:
--------------------------------------------------------------------------------
 1 | # For Qwen2, you can enable the long-context capabilities by following these steps.
 2 | # modify the config.json file by including the below snippet:
 3 | """
 4 | {
 5 |         "architectures": [
 6 |             "Qwen2ForCausalLM"
 7 |         ],
 8 |         // ...
 9 |         "vocab_size": 152064,
10 | 
11 |         // adding the following snippets
12 |         "rope_scaling": {
13 |             "factor": 4.0,
14 |             "original_max_position_embeddings": 32768,
15 |             "type": "yarn"
16 |         }
17 |     }
18 | """
19 | # For details, refer to https://huggingface.co/Qwen/Qwen2-72B-Instruct.
20 | 
21 | # python -m vllm.entrypoints.openai.api_server \
22 | # --served-model-name Qwen2-72B-Instruct \
23 | # --model "Your Checkpoint path" \
24 | # --tensor-parallel-size=8 \
25 | # --trust-remote-code 
26 | 
27 | python -m vllm.entrypoints.openai.api_server \
28 | --served-model-name glm4-9b-1m \ 
29 | --model "Your Checkpoint path" \
30 | --tensor-parallel-size=8 \
31 | --trust-remote-code


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # StructRAG
 2 | StructRAG: Boosting Knowledge Intensive Reasoning of LLMs via Inference-time Hybrid Information Structurization 
 3 | 
 4 | https://arxiv.org/abs/2410.08815
 5 | 
 6 | ## 0. Environment
 7 | ```
 8 | python 3.8.19
 9 | vllm 0.6.3.post1
10 | pip install -r requirement.txt
11 | ```
12 | 
13 | ## 1. Data Preparation
14 | ```
15 | please follow Loong/README.md
16 | ```
17 | 
18 | ## 2. StructRAG Inference
19 | ```python
20 | # 1. launch llm api server
21 | model_path = "/mnt/data/lizhuoqun/hf_models/Qwen2-72B-Instruct"
22 | CUDA_VISIBLE_DEVICES=0,1,2,3 && OUTLINES_CACHE_DIR=tmp && nohup python -m vllm.entrypoints.openai.api_server --model ${model_path} --served-model-name Qwen --tensor-parallel-size 4 --port 1225 --disable-custom-all-reduce > vllm.log
23 | # 2. run StructRAG
24 | python main.py --url {url_of_api_server} # output will be in ./eval_results/qwen/loong
25 | # 3. transform model output to Loong results format
26 | python do_merge_each_batch.py # results will be in ./Loong/output/qwen
27 | ```
28 | 
29 | ## 3. Results Evaluation
30 | ```
31 | cd Loong/src && bash run.sh
32 | ```
33 | 
34 | ## 4. Router Training (optional)
35 | Qwen2-72B-Instruct has already achieved good routing performance under the few-shot examples setting. If wish to further improve routing accuracy, we can train the 7B model using the DPO algorithm:
36 | ```
37 | bash train_router/train.sh
38 | ```
39 | 
40 | After training, deploy the output model as an API using vllm, and obtain url_of_router. When running StructRAG, use the following command:
41 | ```
42 | python main.py --url {url_of_api_server} --router_url {url_of_router}
43 | ```


--------------------------------------------------------------------------------
/do_merge_each_batch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | model_name = "qwen"
 5 | git_hash = ""
 6 | suffix = ""
 7 | 
 8 | if os.path.exists(f"./Loong/output/{model_name}/loong_generate.jsonl"):
 9 |     raise ValueError(f"File already exists: ./Loong/output/{model_name}/loong_generate.jsonl")
10 | if os.path.exists(f"./Loong/output/{model_name}/loong_evaluate.jsonl"):
11 |     raise ValueError(f"File already exists: ./Loong/output/{model_name}/loong_evaluate.jsonl")
12 | 
13 | total_datas = []
14 | 
15 | dir_path = f"./eval_results{git_hash}/{model_name}/loong{suffix}"
16 | 
17 | if os.path.exists(f"{dir_path}/final_output_0.jsonl"):
18 |     a_s = [json.loads(line) for line in open(f"{dir_path}/final_output_0.jsonl")]
19 |     print(len(a_s))
20 |     total_datas += a_s
21 | 
22 | if os.path.exists(f"{dir_path}/final_output_1.jsonl"):
23 |     b_s = [json.loads(line) for line in open(f"{dir_path}/final_output_1.jsonl")]
24 |     print(len(b_s))
25 |     total_datas += b_s
26 | 
27 | if os.path.exists(f"{dir_path}/final_output_2.jsonl"):
28 |     c_s = [json.loads(line) for line in open(f"{dir_path}/final_output_2.jsonl")]
29 |     print(len(c_s))
30 |     total_datas += c_s
31 | 
32 | if os.path.exists(f"{dir_path}/final_output_3.jsonl"):
33 |     d_s = [json.loads(line) for line in open(f"{dir_path}/final_output_3.jsonl")]
34 |     print(len(d_s))
35 |     total_datas += d_s
36 | 
37 | if os.path.exists(f"{dir_path}/final_output_4.jsonl"):
38 |     e_s = [json.loads(line) for line in open(f"{dir_path}/final_output_4.jsonl")]
39 |     print(len(e_s))
40 |     total_datas += e_s
41 | 
42 | if os.path.exists(f"{dir_path}/final_output_5.jsonl"):
43 |     f_s = [json.loads(line) for line in open(f"{dir_path}/final_output_5.jsonl")]
44 |     print(len(f_s))
45 |     total_datas += f_s
46 | 
47 | if os.path.exists(f"{dir_path}/final_output_6.jsonl"):
48 |     g_s = [json.loads(line) for line in open(f"{dir_path}/final_output_6.jsonl")]
49 |     print(len(g_s))
50 |     total_datas += g_s
51 | 
52 | if os.path.exists(f"{dir_path}/final_output_7.jsonl"):
53 |     h_s = [json.loads(line) for line in open(f"{dir_path}/final_output_7.jsonl")]
54 |     print(len(h_s))
55 |     total_datas += h_s
56 | 
57 | print("len(total_datas)", len(total_datas))
58 | 
59 | fw = open(f"./Loong/output/{model_name}/loong_generate.jsonl", "w")
60 | for t in total_datas:
61 |     fw.write(json.dumps(t) + "\n")
62 | fw.close()


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import copy
  4 | import time
  5 | import tqdm
  6 | import random
  7 | random.seed(1024)
  8 | import argparse
  9 | 
 10 | from utils.qwenapi import QwenAPI
 11 | 
 12 | from router import Router
 13 | from structurizer import Structurizer
 14 | from utilizer import Utilizer
 15 | 
 16 | if __name__ == '__main__':
 17 | 
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument("--llm_name", type=str, default="qwen")
 20 |     parser.add_argument("--dataset_name", type=str, default="loong")
 21 |     parser.add_argument("--url", type=str, default="10.32.15.63:1225")
 22 |     parser.add_argument("--router_url", type=str, default=None)
 23 |     parser.add_argument("--worker_id", type=int, choices=[0, 1, 2, 3, 4, 5, 6, 7], default=0)
 24 |     parser.add_argument("--start_bias", type=int, default=0) # used to manually skip last time error data
 25 |     parser.add_argument("--output_path_suffix", type=str, default="")
 26 |     args = parser.parse_args()
 27 | 
 28 |     for k, v in vars(args).items():
 29 |         print(f"{k}: {v}")
 30 |     print('\nstart...')
 31 | 
 32 |     main_llm = QwenAPI(url=f"http://{args.url}/v1/chat/completions")
 33 |     if args.router_url is None:
 34 |         router_llm = QwenAPI(url=f"http://{args.url}/v1/chat/completions")
 35 |     else:
 36 |         router_llm = QwenAPI(url=f"http://{args.router_url}/v1/chat/completions")
 37 | 
 38 |     eval_data_path = "./Loong/data/loong_process.jsonl"
 39 |     eval_datas = [json.loads(l) for l in open(eval_data_path)]
 40 |     random.shuffle(eval_datas)
 41 |     eval_datas = eval_datas[200*args.worker_id+args.start_bias : 200*(args.worker_id+1)]
 42 |     print(f"len eval_datas: {len(eval_datas)}")
 43 | 
 44 |     intermediate_results_dir = f"./intermediate_results/{args.llm_name}/{args.dataset_name}{args.output_path_suffix}"
 45 |     os.makedirs(intermediate_results_dir) if not os.path.exists(intermediate_results_dir) else None
 46 | 
 47 |     chunk_kb_path = f"{intermediate_results_dir}/chunk_kb"
 48 |     graph_kb_path = f"{intermediate_results_dir}/graph_kb"
 49 |     table_kb_path = f"{intermediate_results_dir}/table_kb"
 50 |     algorithm_kb_path = f"{intermediate_results_dir}/algorithm_kb"
 51 |     catalogue_kb_path = f"{intermediate_results_dir}/catalogue_kb"
 52 |     os.makedirs(chunk_kb_path) if not os.path.exists(chunk_kb_path) else None
 53 |     os.makedirs(graph_kb_path) if not os.path.exists(graph_kb_path) else None
 54 |     os.makedirs(table_kb_path) if not os.path.exists(table_kb_path) else None
 55 |     os.makedirs(algorithm_kb_path) if not os.path.exists(algorithm_kb_path) else None
 56 |     os.makedirs(catalogue_kb_path) if not os.path.exists(catalogue_kb_path) else None
 57 | 
 58 |     output_dir = f"./eval_results/{args.llm_name}/{args.dataset_name}{args.output_path_suffix}"
 59 |     os.makedirs(output_dir) if not os.path.exists(output_dir) else None
 60 |     fw = open(f"{output_dir}/final_output_{args.worker_id}.jsonl", "a")
 61 |     fw_error = open(f"{output_dir}/final_output_error_{args.worker_id}.jsonl", "a")
 62 |     exiting_data = [json.loads(l) for l in open(f"{output_dir}/final_output_{args.worker_id}.jsonl")]
 63 |     exiting_data_ids = [d["id"] for d in exiting_data]    
 64 | 
 65 |     router = Router(router_llm)
 66 |     structurizer = Structurizer(main_llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path)
 67 |     utilizer = Utilizer(main_llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path)
 68 | 
 69 |     for i, data in enumerate(eval_datas): # data: {"instruction": "", "question": "", "docs": "", "prompt_template": "{},{},{}"}
 70 |         if data["id"] in exiting_data_ids:
 71 |             print(f"################## Skipping {i}th data existing... ##################")
 72 |             continue
 73 |         print(f"################## Processing {i}th data... ##################")
 74 | 
 75 |         try:
 76 |             current_time = time.time()
 77 |             fw_intermediate = open(f"{intermediate_results_dir}/{data['id']}.jsonl", "w")
 78 | 
 79 |             query = data['prompt_template'].format(instruction=data['instruction'], question=data['question'], docs="......")
 80 |             _, titles = structurizer.split_content_and_tile(data['docs'])
 81 |             core_content = "The titles of the docs are: " + "\n".join(list(set(titles)))
 82 | 
 83 |             # 1. router
 84 |             chosen = router.do_route(query, core_content, data['id'])  
 85 |             fw_intermediate.write(json.dumps({"query": query, "chosen": chosen}, ensure_ascii=False) + "\n")
 86 |             fw_intermediate.flush()
 87 | 
 88 |             # 2. structurizer
 89 |             instruction, kb_info = structurizer.construct(query, chosen, data['docs'], data['id'])
 90 |             fw_intermediate.write(json.dumps({"instruction": instruction, "kb_info": kb_info}, ensure_ascii=False) + "\n")
 91 |             fw_intermediate.flush()
 92 | 
 93 |             # 3. utilizer
 94 |             subqueries = utilizer.do_decompose(query, kb_info, data['id'])
 95 |             fw_intermediate.write(json.dumps({"subqueries": subqueries}, ensure_ascii=False) + "\n")
 96 |             fw_intermediate.flush()
 97 |             subknowledges = utilizer.do_extract(query, subqueries, chosen, data['id'])
 98 |             fw_intermediate.write(json.dumps({"subknowledges": subknowledges}, ensure_ascii=False) + "\n")
 99 |             fw_intermediate.flush()
100 |             answer, _, _ = utilizer.do_merge(query, subqueries, subknowledges, chosen, data['id'])
101 |             fw_intermediate.write(json.dumps({"answer": answer}, ensure_ascii=False) + "\n")
102 |             fw_intermediate.flush()
103 |             
104 |             used_time = (time.time() - current_time) / 60
105 |             print(f"level:{data['level']},set:{data['set']},type:{data['type']}")
106 |             print(f"used time: {used_time:.2f} min")
107 | 
108 |             data['generate_response'] = answer
109 |             data['used_time'] = used_time
110 |             fw.write(json.dumps(data, ensure_ascii=False) + "\n")
111 |             fw.flush()
112 | 
113 |         except Exception as e:
114 |             print(f"(print in main.py) Error: {e}")
115 |             data['generate_response'] = "meet error"
116 |             data['used_time'] = -100
117 |             fw_error.write(json.dumps(data, ensure_ascii=False) + "\n")
118 |             fw_error.flush()
119 | 
120 |     print("all done")


--------------------------------------------------------------------------------
/prompts/README.md:
--------------------------------------------------------------------------------
 1 | # Prompts used in StructRAG
 2 | 
 3 | **router.txt**: Used to guide the router to determine the optimal structure type
 4 | 
 5 | **decompose.txt**: Used to guide the utility to decompose or rewrite the original complex problem
 6 | 
 7 | **construct_table.txt**: Used to guide the constructor to build the original document as a table type knowledge
 8 | 
 9 | **construct_graph.txt**: Used to guide the constructor to build the original document into graph type knowledge
10 | 
11 | **construct_catalogue.txt**: Used to guide the constructor to build the original document into knowledge of the catalog type
12 | 
13 | **constructor_algorithm.txt**: Used to guide the constructor to build the original document into algorithm type knowledge


--------------------------------------------------------------------------------
/prompts/construct_algorithm.txt:
--------------------------------------------------------------------------------
  1 | Instruction:
  2 | Extract the required algorithmic pseudocode from Raw Content based on the requirements described in the Requirement
  3 | It is required to follow the thinking method and output format in Examples, and each action in the pseudocode should be associated with specific information in the original document
  4 | Note that if the requirements described in the Requirement cannot be solved in the form of pseudocode, do not forcefully write pseudocode. You can directly list the information that can solve the requirements
  5 | 
  6 | Examples:
  7 | #################
  8 | #################
  9 | Requirement:
 10 | Given the document set: Intel CPU user manual, Huawei CPU user manual, Apple CPU user manual, Huawei monitor user manual, graphics card manual, host manual, fan manual, and more, extract the necessary algorithm pseudo-code for assembling a computer based on user-customized requirements.
 11 | 
 12 | Raw Content:
 13 | Intel CPU user manual: Our CPU features high performance, advanced multi-core processing, and power efficiency.
 14 | Huawei CPU user manual: The Huawei CPU offers excellent thermal management and AI-powered multitasking.
 15 | Apple CPU user manual: Apple CPUs are known for their efficiency in graphics rendering and seamless integration with macOS.
 16 | Huawei monitor user manual: Huawei monitors offer high refresh rate (144Hz) and excellent color accuracy.
 17 | Graphics card manual: The graphics card supports high resolution and fast processing for gaming. Models include:
 18 | GTX 1650: Mid-range graphics card suitable for light gaming.
 19 | RTX 3060: High-performance card for gaming and content creation with ray tracing support and DLSS technology for improved performance.
 20 | RTX 4090: Top-tier card for gaming at ultra settings and 4K resolution with ray tracing support and DLSS technology.
 21 | Host manual: The host supports modular installation of various components with different color and material options.
 22 | Options: Mid-tower, Full-tower, RGB lighting, black, white, or custom colors and materials (glass, metal, plastic).
 23 | Fan manual: The fan provides optimal cooling for high-performance CPUs, available in different sizes:
 24 | 120mm: Standard cooling for general use and light gaming applications with low noise levels ....
 25 | 140mm: High-efficiency cooling for overclocked systems and high-performance gaming with moderate noise levels ....
 26 | .......
 27 | 
 28 | Output:
 29 | 1. Initialize components:
 30 |    - Intel_CPU = "Intel high performance multi-core CPU"
 31 |    - Huawei_CPU = "Huawei AI-powered multitasking CPU with thermal management"
 32 |    - Apple_CPU = "Apple efficient CPU with seamless macOS integration"
 33 |    - Huawei_Monitor = "Huawei 144Hz, color-accurate monitor"
 34 |    - Standard_Monitor = "Standard 60Hz monitor"
 35 |    - GTX_1650 = "GTX 1650 mid-range graphics card"
 36 |    - RTX_3060 = "RTX 3060 high-performance graphics card"
 37 |    - RTX_4090 = "RTX 4090 top-tier graphics card"
 38 |    - Host_Mid_Tower = "Mid-tower modular host"
 39 |    - Host_Full_Tower = "Full-tower modular host"
 40 |    - Host_RGB_Tower = "RGB-lit tower"
 41 |    - Fan_120mm = "120mm standard cooling fan"
 42 |    - Fan_140mm = "140mm high-efficiency fan"
 43 | 2. Evaluate user requirements:
 44 |    if need high-performance CPU:
 45 |       - IF user needs high refresh rate display:
 46 |          - selected_CPU = Intel_CPU # According to original content, Intel CPU is high performance
 47 |          - selected_monitor = Huawei_Monitor # According to original content, Huawei monitor offers high refresh rate
 48 |          - IF user needs ultra-high graphics performance (4K gaming): # According to original content, RTX 4090 is top-tier card for 4K gaming
 49 |             - selected_GPU = RTX_4090 # According to original content，RTX 4090 is top-tier card for 4K gaming
 50 |             - install(selected_CPU) 
 51 |             - install(selected_monitor)
 52 |             - install(selected_GPU)
 53 |             - install(Host_Full_Tower)  
 54 |             - install(Fan_140mm)   
 55 |          ELSE IF user needs solid gaming performance (1080p to 1440p): # According to original content, RTX 3060 is high-performance card for gaming
 56 |             - selected_GPU = RTX_3060 # According to original content, RTX 3060 is high-performance card for gaming
 57 |             - install(selected_CPU)
 58 |             - install(selected_monitor)
 59 |             - install(selected_GPU)
 60 |             - install(Host_Mid_Tower)   
 61 |             - install(Fan_120mm)      
 62 |          ELSE:
 63 |             - selected_GPU = GTX_1650  # According to original content, GTX 1650 is mid-range card suitable for light gaming
 64 |             - install(selected_CPU)
 65 |             - install(selected_monitor)
 66 |             - install(selected_GPU)
 67 |             - install(Host_Mid_Tower)
 68 |             - install(Fan_120mm)
 69 |       ELSE IF user prefers general high performance without high refresh rate: # According to original content, Huawei CPU offers thermal management and multitasking
 70 |          - selected_CPU = Intel_CPU # According to original content, Intel CPU is high performance
 71 |          - selected_monitor = Standard_Monitor # According to original content, standard monitor is 60Hz
 72 |          - selected_GPU = GTX_1650   
 73 |          - install(selected_CPU)
 74 |          - install(selected_monitor)
 75 |          - install(selected_GPU)
 76 |          - install(Host_Mid_Tower)
 77 |          - install(Fan_120mm)
 78 |    ELSE IF need thermal management and multitasking CPU: # According to original content, Huawei CPU offers thermal management and multitasking
 79 |       - selected_CPU = Huawei_CPU # According to original content, Huawei CPU offers thermal management and multitasking
 80 |       IF user needs efficient graphics: # According to original content, RTX 3060 is high-performance card for gaming and content creation
 81 |          - selected_GPU = RTX_3060 # According to original content, RTX 3060 is high-performance card for gaming and content creation
 82 |          - install(selected_CPU)
 83 |          - install(selected_GPU)
 84 |          - install(Host_Mid_Tower)
 85 |          - install(Fan_140mm)    
 86 |       ELSE: 
 87 |          - selected_GPU = GTX_1650  # According to original content, GTX 1650 is mid-range card suitable for light gaming
 88 |          - install(selected_CPU)
 89 |          - install(selected_GPU)
 90 |          - install(Host_Mid_Tower)
 91 |          - install(Fan_120mm)
 92 |    ELSE IF need macOS integration and efficiency: # According to original content, Apple CPU is efficient with macOS integration
 93 |       - selected_CPU = Apple_CPU # According to original content, Apple CPU is efficient with macOS integration
 94 |       IF user needs efficient graphics rendering: # According to original content, RTX 3060 is high-performance card for gaming and content creation
 95 |          - selected_GPU = RTX_3060  # According to original content, RTX 3060 is high-performance card for gaming and content creation
 96 |          - install(selected_CPU)
 97 |          - install(selected_GPU)
 98 |          - install(Host_Mid_Tower)  
 99 |          - install(Fan_120mm)       
100 |       ELSE:
101 |          - selected_GPU = GTX_1650  # According to original content, GTX 1650 is mid-range card suitable for light gaming
102 |          - install(selected_CPU)
103 |          - install(selected_GPU)
104 |          - install(Host_Mid_Tower)
105 |          - install(Fan_120mm)
106 | 3. Evaluate aesthetics and color preferences:
107 |    IF user prefers RGB lighting: # According to original content, RGB lighting is an option for the host
108 |       - selected_host = Host_RGB_Tower
109 |    ELSE IF user prefers a sleek design: # According to original content, black and white are color options for the host
110 |       - selected_host = Host_Full_Tower
111 |    ELSE:
112 |       - selected_host = Host_Mid_Tower
113 | 4. Final system check:
114 |    - power_on()
115 |    - verify_CPU_performance()
116 |    - verify_display_quality()
117 |    - test_GPU_performance()
118 |    - check_fan_speed_and_efficiency()
119 | #################
120 | #################
121 | 
122 | Requirement:
123 | {requirement}
124 | 
125 | Raw Content:
126 | {raw_content}
127 | 
128 | Output:


--------------------------------------------------------------------------------
/prompts/construct_catalogue.txt:
--------------------------------------------------------------------------------
 1 | Instruction:
 2 | Extract the required directory structure from Raw Content based on the requirements described in the Requirement, which is a hierarchical summary. The number of layers and the number of nodes in each layer are determined according to specific circumstances.
 3 | Please follow the thinking style and output format in Examples, and note that each level of Summary needs to have a number to distinguish between different levels. And each summary needs to be very detailed.
 4 | Note that you need to extract as much relevant information as possible from the Raw Content based on the entity names and person names mentioned in the Retirement, in order to build a complete directory structure.
 5 | 
 6 | Examples:
 7 | #################
 8 | #################
 9 | Requirement:
10 | Query is How do guests perceive the impact of privacy laws on technology development?, please extract relevant catalogues from the document based on the Query.
11 | 
12 | Raw Content:
13 | Episode 48 - Randall Munroe
14 | RANDALL MUNROE: Then fell into doing comics where I can spend all day diving into some rabbit hole, and then draw comics about it. And then the next day, move on to a different thing. I found a way to grab all the candy in the candy store.
15 | KEVIN SCOTT: Hi, everyone. Welcome to Behind the Tech. I’m your host, Kevin Scott, Chief Technology Officer for Microsoft. 
16 | In this podcast, we’re going to get behind the tech. We’ll talk with some of the people who have made our modern tech world possible and understand what motivated them to create what they did. So, join me to maybe learn a little bit about the history of computing and get a few behind-the-scenes insights into what’s happening today. Stick around.
17 | CHRISTINA WARREN: Hello, and welcome to Behind the Tech. I’m Christina Warren, Senior Developer Advocate at GitHub.
18 | KEVIN SCOTT: And I’m Kevin Scott.
19 | CHRISTINA WARREN: And today, we have a super exciting guest with us, Randall Munroe. He’s famous for creating the webcomic xkcd
20 | KEVIN SCOTT: Yeah, it’s – Randall is maybe my favorite cartoonist. So I very rarely post cartoons onto social media; 100% of them are Randall’s stuff.
21 | CHRISTINA WARREN: 100%. 
22 | KEVIN SCOTT: And so, once a month, once every other month, like he’ll write something that I just think is so fabulously funny that I have to share it with my other nerd friends.
23 | CHRISTINA WARREN: No, I mean, well – well that – well, that’s what makes the comic so good is – and – and you know, I’m interested to hear what you two talk about. But it’s such a – it covers such a wide spectrum of – of nerd-adjacent topics. And – and because the comic’s been going on so long, they’re literally – it’s like The Simpsons There literally is one for everything. You know, like there’s – there’s an xkcd that you can apply to any situation.
24 | .......
25 | .......
26 | .......
27 | 
28 | Output:
29 | In question in requirement, the main topic is about the impact of privacy laws on technology development. Thus we extract "impact of privacy laws on technology development" related information from the raw content. And constructed a hierarchical summary based on the extracted information.
30 | 	1.	First-Level Summary 1: AI Technology and Regulatory Challenges
31 | 	•	The podcast explores the complex relationship between AI advancements and existing legal frameworks, with a particular focus on privacy laws like HIPAA and how they interact with technological innovation.
32 | (1) Second-Level Summary 1: Regulatory Concerns in Financial Services
33 | 	•	Ethan Mollick highlights concerns that the current regulatory environment in financial services is not well-suited to address the unique challenges posed by AI, particularly the uncertainty surrounding the applicability of existing regulations.
34 | 	•	(a) Third-Level Summary 1: Innovation Hindered by Regulatory Ambiguity
35 | 	•	Mollick discusses how the lack of clarity in regulations impedes the ability of industries, like finance, to fully harness the potential of AI technologies.
36 | 	•	(b) Third-Level Summary 1: Need for Adaptive Regulations
37 | 	•	He advocates for a more dynamic and responsive regulatory framework that can evolve alongside technological advancements, ensuring both safety and innovation.
38 | (2) Second-Level Summary 2: AI in Healthcare and Privacy Concerns
39 | 	•	The podcast also delves into the intersection of AI experimentation in healthcare and the need to comply with privacy regulations like HIPAA.
40 | 	•	(a) Third-Level Summary 2: Balancing Privacy and AI Benefits
41 | 	•	Discussions emphasize the challenge of ensuring privacy while leveraging AI to improve healthcare systems and access to medical services.
42 | 	•	(b) Third-Level Summary 2: Ethical Considerations in AI Use
43 | 	•	Mollick touches on concerns over AI misuse, such as “data rape,” and underscores the importance of regulating AI to promote positive outcomes while preventing harmful practices.
44 | 	2.	First-Level Summary 2: The Call for Responsive AI Regulation
45 | 	•	Mollick and other guests advocate for a regulatory approach that allows for experimentation and innovation, particularly in areas like healthcare, while mitigating potential risks.
46 | (1) Second-Level Summary 1: The Need for Smart and Responsive Regulation
47 | 	•	Mollick calls for a “fast, smart, responsive regulation” that monitors emerging harms in AI and carves out space for experimentation in critical sectors like medicine.
48 | 	•	(a) Third-Level Summary 1: Evolving with Technological Advancements
49 | 	•	He stresses that regulations must evolve as quickly as the technology itself to ensure they are effective in addressing both the opportunities and risks associated with AI.
50 | (2) Second-Level Summary 2: AI as a General-Purpose Technology
51 | 	•	The conversation highlights the far-reaching implications of AI, recognizing it as a general-purpose technology with the potential to significantly impact various sectors.
52 | 	•	(a) Third-Level Summary 2: Promoting Innovation While Protecting Rights
53 | 	•	Experts argue that while privacy laws are crucial to prevent misuse, they must also be flexible enough to allow for innovation, ensuring AI’s positive potential is not stifled.
54 | 	•	(b) Third-Level Summary 2: The Need for Balance
55 | 	•	The guests suggest that a balanced approach to regulation is necessary, one that promotes innovation while protecting individual rights and societal interests.
56 | 	3.	First-Level Summary 3: Conclusion on the Future of AI Regulation
57 | 	•	The episode concludes with a call for a balanced regulatory framework that can adapt to the evolving nature of AI, ensuring that both privacy and innovation are protected.
58 | (1) Second-Level Summary 1: Regulatory Agility for AI’s Future
59 | 	•	Experts emphasize that regulations must be agile enough to keep pace with AI developments, ensuring that the technology can be used safely while minimizing potential harms.
60 | 	•	(a) Third-Level Summary 1: Agility in Regulation
61 | 	•	The need for regulatory frameworks that evolve in tandem with technological advancements is underscored as a key factor in supporting AI’s positive societal impact.
62 | #################
63 | #################
64 | 
65 | Requirement:
66 | {requirement}
67 | 
68 | Raw Content:
69 | {raw_content}
70 | 
71 | Output:


--------------------------------------------------------------------------------
/prompts/construct_graph.txt:
--------------------------------------------------------------------------------
 1 | Instruction:
 2 | Extract the required triplets from Raw Content according to the requirements described in the Requirement
 3 | The output of a triplet is in the format of {{'head ':'... ',' relation ':'... ',' tail ': [...', '...']}}.
 4 | Note that not all triples in the text need to be extracted. You need to analyze the relationships and entities mentioned in the Requirement and only extract the relevant triples
 5 | Note that the head and tail you output should be kept as complete as possible. They may not be just a word or phrase, but can also be a sentence or a paragraph of text. Try to be consistent with the original text and do not make any abbreviations.
 6 | 
 7 | Examples:
 8 | #################
 9 | #################
10 | Requirement:
11 | It is necessary to construct a graph based on a given document, where the entity is the title of the paper, the relationship is a reference, and the title of the given document is used as the head, while the titles of other papers are used as the tail
12 | 
13 | Noting:
14 | You only need to consider the following paper titles,
15 | Generative AI and Large Language Models for Cyber Security: All Insights You Need
16 | WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW
17 | Can Large Language Models Be an Alternative to Human Evaluations?
18 | LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning
19 | Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers
20 | 
21 | Raw Content:
22 | # Generative AI and Large Language Models for Cyber Security: All Insights You Need 
23 | Mohamed Amine Ferrag, Fatima Alwahedi, Ammar Battah, Bilel Cherif, Abdechakour Mechri,<br>and Norbert Tihanyi
24 | #### Abstract
25 | The rapid evolution of cyber threats requires innovative approaches to enhance cybersecurity defenses. In this paper, 
26 | Index Terms-Generative AI, LLM, Transformer, Security, Cyber Security.
27 | M. A. Ferrag is the corresponding author.
28 | ## LIST OF ABBREVIATIONS
29 | AI Artificial Intelligence
30 | ## I. INTRODUCTION
31 | The history of Natural Language Processing (NLP) dates back to the 1950s when the Turing test was developed. However, NLP has seen significant advancements in 
32 | [141] ZySec-AI, "Zysec-ai: Project zysec," Webpage, accessed: 2024-05-01. [Online]. Available: https://github.com/ZySec-AI/project-zysec
33 | [205] M. Bhatt, S. Chennabasappa, C. Nikolaidis, S. Wan, I. Evtimov, D. Gabi, D. Song, F. Ahmad, C. Aschermann, L. Fontana et al., "Purple llama cyberseceval: A secure coding benchmark for language models," arXiv preprint arXiv:2312.04724, 2023.
34 | [206] Z. Liu, "Secqa: A concise question-answering dataset for evaluating large language models in computer security," arXiv preprint arXiv:2312.15838, 2023.
35 | [207] M. Bhatt, S. Chennabasappa, Y. Li, C. Nikolaidis, D. Song, S. Wan, F. Ahmad, C. Aschermann, Y. Chen, D. Kapil, D. Molnar, S. Whitman, and J. Saxe, "Cyberseceval 2: A wide-ranging cybersecurity evaluation suite for large language models," 2024.
36 | [208] N. Li, A. Pan, A. Gopal, S. Yue, D. Berrios, A. Gatti, J. D. Li, A.K. Dombrowski, S. Goel, L. Phan et al., "The wmdp benchmark: Measuring and reducing malicious use with unlearning," arXiv preprint arXiv:2403.03218, 2024.
37 | [209] Y. Sun, D. Wu, Y. Xue, H. Liu, W. Ma, L. Zhang, M. Shi, and Y. Liu, "Llm4vuln: A unified evaluation framework for decoupling and enhancing llms\' vulnerability reasoning," 2024.
38 | [210] Z. Liu, J. Shi, and J. F. Buford, "Cyberbench: A multi-task benchmark for evaluating large language models in cybersecurity." [Online]. Available: http://aics.site/AICS2024/AICS_CyberBench.pdf
39 | 
40 | Output:
41 | Among the paper titles that need to be considered, "Generative AI and Large Language Models for Cyber Security: All Insights You Need" is the title of the given document, so it should be used as the head. Among the other paper titles that need to be considered, "Llm4vuln: A unified evaluation framework for decoupling and enhancing llms \'vulnerability reasoning" appears in the reference of the given document, so it should be used as the tail. The remaining paper titles that need to be considered do not appear in the given document, so they are not considered.
42 | {{"head": "Generative AI and Large Language Models for Cyber Security: All Insights You Need", "relation": "reference", "tail": ["Llm4vuln: A unified evaluation framework for decoupling and enhancing llms\' vulnerability reasoning"]}}
43 | #################
44 | #################
45 | 
46 | Requirement:
47 | It is necessary to construct a graph based on a given document, where the entity is the title of the paper, the relationship is a reference, and the title of the given document is used as the head, while the titles of other papers are used as the tail
48 | 
49 | Noting:
50 | You only need to consider the following paper titles,
51 | Generative AI and Large Language Models for Cyber Security: All Insights You Need
52 | WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW
53 | Can Large Language Models Be an Alternative to Human Evaluations?
54 | LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning
55 | Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers
56 | 
57 | Raw Content:
58 | # LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs\' Vulnerability Reasoning 
59 | Daoyuan $\\mathrm{{Wu}}^{{*}}$<br>Nanyang Technological University<br>Singapore, Singapore<br>daoyuan.wu@ntu.edu.sg<br>Wei Ma<br>Nanyang Technological University<br>Singapore, Singapore<br>ma_wei@ntu.edu.sg
60 | Yue Xue<br>MetaTrust Labs<br>Singapore, Singapore<br>xueyue@metatrust.io<br>Lyuye Zhang<br>Nanyang Technological University<br>Singapore, Singapore<br>zh0004ye@e.ntu.edu.sg
61 | Miaolei Shi<br>MetaTrust Labs<br>Singapore, Singapore<br>stan@metatrust.io
62 | Yang Liu<br>Nanyang Technological University<br>Singapore, Singapore<br>yangliu@ntu.edu.sg
63 | #### Absract
64 | Large language models (LLMs) have demonstrated significant potential for many downstream tasks, including those requiring humanlevel intelligence, such as vulnerability detection. However, recent attempts to use LLMs for vulnerability detection are still preliminary, as they lack an in-depth understanding of a subject LLM\'s vulnerability reasoning capability - whether it originates from the model itself or from external assistance, such as invoking tool support and retrieving vulnerability knowledge.
65 | ## REFERENCES
66 | [1] 2023. Ethereum Whitepaper. https://ethereum.org/whitepaper
67 | [2] 2023. Solidity Programming Language. https://soliditylang.org
68 | [21] Yizheng Chen, Zhoujie Ding, Lamya Alowain, Xinyun Chen, and David Wagner 2023. DiverseVul: A New Vulnerable Source Code Dataset for Deep Learning Based Vulnerability Detection. In Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses. ACM, Hong Kong China, 654-668. https://doi.org/10.1145/3607199.3607242
69 | [22] Cheng-Han Chiang and Hung-yi Lee. 2023. Can Large Language Models Be an Alternative to Human Evaluations?. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Anna Rogers, Jordan Boyd-Graber, and Naoaki Okazaki (Eds.). Association for Computational Linguistics, Toronto, Canada, 15607-15631. https://doi.org/10.18653/v1/2023.acllong. 870
70 | [23] Damai Dai, Yutao Sun, Li Dong, Yaru Hao, Shuming Ma, Zhifang Sui, and Furu Wei 2023. Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers. arXiv:2212.10559 (May 2023). https //doi.org/10.48550/arXiv.2212.10559 arXiv:2212.10559 [cs].
71 | 
72 | Output:
73 | Among the paper titles that need to be considered, 'LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning' is the title of the given document, so it should be used as the head. Among the other paper titles that need to be considered, 'Why Can GPT Learn In Context?'? Language Models Implicitly Perform Gradient Descent as Meta Optizers "and" Can Large Language Models Be an Alternative to Human Evaluations? "Appear in the references of the given document, so they should be considered as tails. The remaining paper titles that need to be considered are not included in the given document, so they are not considered.
74 | {{"head": "LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs\' Vulnerability Reasoning", "relation": "reference", "tail": ["Why Can GPT Learn In-Context? Language Models Implicitly Perform Gradient Descent as Meta-Optimizers.", "Can Large Language Models Be an Alternative to Human Evaluations?"]}}
75 | #################
76 | #################
77 | 
78 | Requirement:
79 | {requirement}
80 | 
81 | Noting:
82 | You only need to consider the following paper titles,
83 | {titles}
84 | 
85 | Raw Content:
86 | {raw_content}
87 | 
88 | Output:


--------------------------------------------------------------------------------
/prompts/construct_table.txt:
--------------------------------------------------------------------------------
 1 | Instruction: 
 2 | Extract complete relevant tables from Raw Content based on the requirements described in the Requirement.
 3 | Note that when building a table, it is important to retain the table title and source information, such as which company and report the table comes from.
 4 | 
 5 | Hints:
 6 | Firstly, identify the keywords in the Requirement, including entity names and attribute names, and then extract them from the Raw Content based on these keywords.
 7 | If the Raw Content does not contain the information required by the Requirement, then extract the small amount of information most relevant to the Requirement from the Raw Content
 8 | 3. When analyzing Requirements and extracting Raw Content, do not translate and maintain the original language
 9 | 
10 | Raw Content:
11 | {content}
12 | 
13 | Requirement: 
14 | {instruction}
15 | 
16 | Output:


--------------------------------------------------------------------------------
/prompts/decompose.txt:
--------------------------------------------------------------------------------
 1 | Instruction:
 2 | In order to solve complex document-related problems, you need to break down the given Query into multiple relatively simple and independent sub-problems.
 3 | 
 4 | Requirement:
 5 | 1. Doc Info is a description of the document information. You can reference this information to implement the breakdown.
 6 | 2. If the given problem is already simple enough or there is no way to break it down, then no breakdown is needed.
 7 | 3. Follow the output format and thought process in the Examples, and do not provide any additional explanatory notes.
 8 | 
 9 | Examples:
10 | #################
11 | #################
12 | Doc Info:
13 | The titles of the docs are: "Judgment Document 7"\n"Judgment Document 3"\n"Judgment Document 2"\n"Judgment Document 4"\n"Judgment Document 6"\n"Judgment Document 8"\n"Judgment Document 5"\n"Judgment Document 1"
14 | 
15 | Query:
16 | ... Please read the above judgment documents and classify all the judgment documents according to the following 6 types of case causes: 'Property Dispute', 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)', 'Cause for Execution - Administrative Non-Litigation Execution', 'Corruption and Bribery', 'Cause for Execution - Other Causes', and 'Administrative Action - Administrative Payment'. You only need to output the title of each judgment document, following the format below with titles as per the judgment documents provided:\n{{'Property Dispute': ['Judgment Document a', 'Judgment Document b'], 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)': ['Judgment Document a', 'Judgment Document b'], 'Cause for Execution - Administrative Non-Litigation Execution': ['Judgment Document a', 'Judgment Document b'], 'Corruption and Bribery': ['Judgment Document a', 'Judgment Document b'], 'Cause for Execution - Other Causes': ['Judgment Document a', 'Judgment Document b'], 'Administrative Action - Administrative Payment': ['Judgment Document a', 'Judgment Document b']}}\nYour answer is:
17 | 
18 | Output:
19 | determine whether the cause of each given judgment document is 'Property Dispute', 
20 | determine whether the cause of each given judgment document is 'Administrative Entity - Labor and Social Security Administration (Labor, Social Security)', 
21 | determine whether the cause of each given judgment document is 'Cause for Execution - Administrative Non-Litigation Execution', 
22 | determine whether the cause of each given judgment document is 'Corruption and Bribery', 
23 | determine whether the cause of each given judgment document is 'Cause for Execution - Other Causes', 
24 | determine whether the cause of each given judgment document is 'Administrative Action - Administrative Payment'.
25 | 
26 | #################
27 | Doc Info:
28 | The titles of the docs are: "Judgment Document 7"\n"Judgment Document 3"\n"Judgment Document 1"\n"Judgment Document 2"\n"Judgment Document 4"\n"Judgment Document 5"\n\n"Judgment Document 6"
29 | 
30 | Query:
31 | ...... Based solely on the judgment documents seen above, answer the following question.\n\nRead the above judgment documents, and I will provide you with several judgment results: {{\'Judgment Result 1\': \'1. Uphold the decision of the Huainan Xiejiaji District People's Court of Anhui Province 20....\', \'Judgment Result 2\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 13,800 yuan,....', \'Judgment Result 3\': \'Uphold the Civil Judgement No. 4850 (2020) of the Tianjin Third Intermediate People's Court.\\nThis judgment is final.\', \'Judgment Result 4\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 50 yuan, paid by the appellant Li (prepaid by the appellant).\\nThis judgment is final.\'}}.\nYou need to determine which judgment result is most likely for each of the above judgment documents. Please output in the following JSON format:\n{{"Judgment Document 1":"Judgment Result a", "Judgment Document 2":"Judgment Result b", "Judgment Document 3":"Judgment Result c"}}\nOnly provide the judgment document titles and the judgment result numbers, not the specific content.\nPlease directly provide the answer:
32 | 
33 | Output:
34 | I will provide you with several judgment results: {{\'Judgment Result 1\': \'1. Uphold the decision of the Huainan Xiejiaji District People's Court of Anhui Province 20....\', \'Judgment Result 2\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 13,800 yuan,....', \'Judgment Result 3\': \'Uphold the Civil Judgement No. 4850 (2020) of the Tianjin Third Intermediate People's Court.\\nThis judgment is final.\', \'Judgment Result 4\': \'Dismiss the appeal and uphold the original judgment.\\nThe second-instance case acceptance fee is 50 yuan, paid by the appellant Li (prepaid by the appellant).\\nThis judgment is final.\'}}.\nYou need to determine which judgment result is most likely for the given judgment document.
35 | 
36 | #################
37 | Doc Info:
38 | The titles of the docs are: "2020 Report"\n"2025 Fourth Quarter Report"
39 | 
40 | Query:
41 | ... Please answer which company has the highest 'Total Owner's Equity'?
42 | 
43 | Output:
44 | Identify the company with the highest 'Total Owner's Equity'.
45 | 
46 | #################
47 | Doc Info:
48 | The titles of the docs are: "WHEN LLMs MEET CYberSECURITY: A SYStEMATIC LITERATURE REVIEW"\n"Generative AI and Large Language Models for Cyber Security: All Insights You Need"\n"LLM4Vuln: A Unified Evaluation Framework for Decoupling and Enhancing LLMs' Vulnerability Reasoning"
49 | 
50 | Query:
51 | ...Paper: OpenMoE: An Early Effort on Open Mixture-of-Experts Language Models. "instruction": We hope you will carefully study the provided papers and determine the citation relationships between them......
52 | 
53 | Output:
54 | Find the required triples with entities as the given papers and the relationship as citation.
55 | 
56 | #################
57 | #################
58 | 
59 | Doc Info:
60 | {kb_info}
61 | 
62 | Query:
63 | {query}
64 | 
65 | Output:


--------------------------------------------------------------------------------
/prompts/route.txt:
--------------------------------------------------------------------------------
 1 | Instruction:
 2 | To generate answers to questions based on documents, I need to structure the documents as a table, graph, or chunk. 
 3 | Generally speaking, statistical questions prefer tables, chain reasoning questions prefer graphs, and single-hop QA questions prefer chunks. 
 4 | Now, given the following document information and question, please determine which type of knowledge structure I should use. 
 5 | Simply output one of the three words: table, graph, or chunk, without providing any further explanation.
 6 | 
 7 | Examples:
 8 | =================
 9 | =================
10 | Doc Info:
11 | "2024 Financial Report"
12 | 
13 | Query:
14 | ......What is the amount of Mango Excellent Media's trading financial assets? Please read the financial statements of the above-mentioned companies and answer the following questions based only on the content seen above. You can ignore parts related to financial reports that are not mentioned in the questions and only answer the other parts.
15 | 
16 | Output:
17 | table
18 | 
19 | =================
20 | Doc Info:
21 | "Judgment Document 1" "Judgment Document 2" "Judgment Document 3" "Judgment Document 4" "Judgment Document 5"
22 | 
23 | Query:
24 | ......Which judgment document among the above has the cause of action as 'Administrative Action - Administrative Registration'? Please answer the question based only on the judgment documents seen above, and simply provide the title of the judgment document that meets the criteria. Based solely on the content seen in the judgment documents, I will give you several judgment results: {{'Judgment Result 1': '1. Uphold the decision of the Xiejiaji District People’s Court of Huainan City, Anhui Province (20....', 'Judgment Result 2': 'Dismiss the appeal and uphold the original judgment.\nThe second-instance case acceptance fee is 13,800 yuan,....', 'Judgment Result 3': 'Uphold the civil judgment of the Third Intermediate People’s Court of Tianjin (2020) Jin 03 Min Zhong 4850 No.\nThis judgment is final.', 'Judgment Result 4': 'Dismiss the appeal and uphold the original judgment.\nThe second-instance case acceptance fee is 50 yuan, to be borne by the appellant Li (the appellant has prepaid).\nThis judgment is final.'}}. You need to determine which judgment result is the most likely for all the above judgment documents. Please output in the following JSON format: {{"Judgment Document 1": "Judgment Result a", "Judgment Document 2": "Judgment Result b", "Judgment Document 3": "Judgment Result c"}} Only provide the judgment document titles and judgment result numbers; specific content output is not required.
25 | 
26 | Output:
27 | chunk
28 | 
29 | =================
30 | Doc Info:
31 | Patchscopes: A Unifying Framework for Inspecting Hidden Representations of Language Models\n Llama: Efficient Permutation Importance Sampling\n Vicuna: Visual Contextualization and Navigation for Large-scale Codebases
32 | 
33 | Query:
34 | '#Papers:\n......\n\nWe hope you will carefully study the provided papers and determine the citation relationships between them. Please follow the instructions below strictly to complete the task:\n\n#Specific Requirements:\n1. Reference: When a given paper mentions other provided papers, those other papers are considered as "references" for the given paper. To summarize in this specific context, references are about what the given paper is using.\n2. Citation: Conversely, when other provided papers mention the given paper in their works, the given paper is being "cited" by those other papers. To summarize in this specific context, citations are about who is using the given paper.\n3. Given a paper, you need to determine the citation or reference relationship between this paper and the other papers. Do not consider papers that are not provided.\n3. Please present the paper titles in a json format as follows: {{"Reference":["Reference Title 1", "Reference Title 2", ..., "Reference Title n"], "Citation":["Citation Title 1", "Citation Title 2", ..., "Citation Title n"]}}.\n4. If a paper does not have any references or citations, please leave the corresponding list empty, e.g.{{"Refernce":[]}}, {{"Citation":[]}}.\n\n#The paper you need to analyze:\nPatchscopes: A Unifying Framework for Inspecting Hidden Representations of Language Models'
35 | 
36 | Output:
37 | graph
38 | =================
39 | =================
40 | 
41 | Doc Info:
42 | {titles}
43 | 
44 | Query:
45 | {query}
46 | 
47 | Output:


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | accelerate==0.31.0
  2 | aiohttp==3.9.5
  3 | aiosignal==1.3.1
  4 | annotated-types==0.7.0
  5 | anthropic==0.30.1
  6 | anyio==4.4.0
  7 | async-timeout==4.0.3
  8 | attrs==23.2.0
  9 | beautifulsoup4==4.12.3
 10 | certifi==2024.6.2
 11 | charset-normalizer==3.3.2
 12 | click==8.1.7
 13 | colorlog==6.8.2
 14 | dashscope==1.20.1
 15 | datasets==2.21.0
 16 | deepspeed==0.15.0
 17 | dill==0.3.8
 18 | distro==1.9.0
 19 | docker-pycreds==0.4.0
 20 | docstring_parser==0.16
 21 | eval_type_backport==0.2.0
 22 | exceptiongroup==1.2.1
 23 | faiss-gpu==1.7.2
 24 | filelock==3.15.4
 25 | frozenlist==1.4.1
 26 | fsspec==2024.5.0
 27 | gitdb==4.0.11
 28 | GitPython==3.1.43
 29 | google==3.0.0
 30 | gritlm==1.0.0
 31 | h11==0.14.0
 32 | hjson==3.1.0
 33 | httpcore==1.0.5
 34 | httpx==0.27.0
 35 | huggingface-hub==0.23.4
 36 | idna==3.7
 37 | Jinja2==3.1.4
 38 | jiter==0.5.0
 39 | joblib==1.4.2
 40 | jsonlines==4.0.0
 41 | logger==1.4
 42 | markdown-it-py==3.0.0
 43 | MarkupSafe==2.1.5
 44 | mdurl==0.1.2
 45 | modelscope==1.20.1
 46 | mpmath==1.3.0
 47 | mteb==1.12.54
 48 | multidict==6.0.5
 49 | multiprocess==0.70.16
 50 | networkx==3.1
 51 | ninja==1.11.1.1
 52 | nltk==3.8.1
 53 | numpy==1.24.4
 54 | nvidia-cublas-cu12==12.1.3.1
 55 | nvidia-cuda-cupti-cu12==12.1.105
 56 | nvidia-cuda-nvrtc-cu12==12.1.105
 57 | nvidia-cuda-runtime-cu12==12.1.105
 58 | nvidia-cudnn-cu12==8.9.2.26
 59 | nvidia-cufft-cu12==11.0.2.54
 60 | nvidia-curand-cu12==10.3.2.106
 61 | nvidia-cusolver-cu12==11.4.5.107
 62 | nvidia-cusparse-cu12==12.1.0.106
 63 | nvidia-nccl-cu12==2.20.5
 64 | nvidia-nvjitlink-cu12==12.5.40
 65 | nvidia-nvtx-cu12==12.1.105
 66 | openai==1.35.3
 67 | packaging==24.1
 68 | pandas==2.0.3
 69 | peft==0.12.0
 70 | pillow==10.3.0
 71 | platformdirs==4.2.2
 72 | polars==0.20.31
 73 | protobuf==5.27.2
 74 | psutil==6.0.0
 75 | py-cpuinfo==9.0.0
 76 | pyarrow==16.1.0
 77 | pyarrow-hotfix==0.6
 78 | pydantic==2.7.4
 79 | pydantic_core==2.18.4
 80 | Pygments==2.18.0
 81 | python-dateutil==2.9.0.post0
 82 | pytrec-eval-terrier==0.5.6
 83 | pytz==2024.1
 84 | PyYAML==6.0.1
 85 | regex==2024.5.15
 86 | requests==2.32.3
 87 | rich==13.7.1
 88 | safetensors==0.4.3
 89 | scikit-learn==1.3.2
 90 | scipy==1.10.1
 91 | sentence-transformers==3.0.1
 92 | sentry-sdk==2.6.0
 93 | setproctitle==1.3.3
 94 | shtab==1.7.1
 95 | six==1.16.0
 96 | smmap==5.0.1
 97 | sniffio==1.3.1
 98 | soupsieve==2.5
 99 | sympy==1.12.1
100 | threadpoolctl==3.5.0
101 | tiktoken==0.7.0
102 | tokenizers==0.19.1
103 | torch==2.3.1
104 | torchaudio==2.3.1
105 | torchvision==0.18.1
106 | tqdm==4.66.4
107 | transformers==4.41.2
108 | triton==2.3.1
109 | trl==0.10.1
110 | typing_extensions==4.12.2
111 | tyro==0.8.10
112 | tzdata==2024.1
113 | urllib3==2.2.2
114 | wandb==0.17.8
115 | websocket-client==1.8.0
116 | xxhash==3.4.1
117 | yarl==1.9.4
118 | 


--------------------------------------------------------------------------------
/router.py:
--------------------------------------------------------------------------------
 1 | class Router:
 2 |     def __init__(self, llm):
 3 |         self.llm = llm
 4 |     
 5 |     def do_route(self, query, core_content, data_id):
 6 |         print(f"data_id: {data_id}, do_route...") 
 7 |         
 8 |         raw_prompt = open("prompts/route.txt", "r").read()
 9 | 
10 |         prompt = raw_prompt.format(
11 |             query=query,
12 |             titles=core_content
13 |         )
14 |         output = self.llm.response(prompt) 
15 | 
16 |         if "table" in output.lower():
17 |             chosen = "table"
18 |         elif "graph" in output.lower():
19 |             chosen = "graph"
20 |         elif "algorithm" in output.lower():
21 |             chosen = "algorithm"
22 |         elif "catalogue" in output.lower():
23 |             chosen = "catalogue"
24 |         else:
25 |             chosen = "chunk"
26 | 
27 |         return chosen


--------------------------------------------------------------------------------
/structurizer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | 
  4 | class Structurizer:
  5 |     def __init__(self, llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path):
  6 |         self.llm = llm
  7 |         self.chunk_kb_path = chunk_kb_path
  8 |         self.graph_kb_path = graph_kb_path
  9 |         self.table_kb_path = table_kb_path
 10 |         self.algorithm_kb_path = algorithm_kb_path
 11 |         self.catalogue_kb_path = catalogue_kb_path
 12 | 
 13 |     def construct(self, query, chosen, docs, data_id):
 14 |         print(f"data_id: {data_id}, construct...")
 15 | 
 16 |         if chosen == "graph":
 17 |             instruction = f"Based on the given document, construct a graph where entities are the titles of papers and the relation is 'reference', using the given document title as the head and other paper titles as tails."
 18 |             info_of_graph = self.do_construct_graph(instruction, docs, data_id)
 19 |             return instruction, info_of_graph
 20 |         elif chosen == "table":
 21 |             instruction = f"Query is {query}, please extract relevant complete tables from the document based on the attributes and keywords mentioned in the Query. Note: retain table titles and source information."
 22 |             info_of_table = self.do_construct_table(instruction, docs, data_id)
 23 |             return instruction, info_of_table
 24 |         elif chosen == "algorithm":
 25 |             instruction = f"Query is {query}, please extract relevant algorithms from the document based on the Query."
 26 |             info_of_algorithm = self.do_construct_algorithm(instruction, docs, data_id)
 27 |             return instruction, info_of_algorithm
 28 |         elif chosen == "catalogue":
 29 |             instruction = f"Query is {query}, please extract relevant catalogues from the document based on the Query."
 30 |             info_of_catalogue = self.do_construct_catalogue(instruction, docs, data_id)
 31 |             return instruction, info_of_catalogue
 32 |         elif chosen == "chunk":
 33 |             instruction = f"construct chunk"
 34 |             info_of_chunk = self.do_construct_chunk(instruction, docs, data_id)
 35 |             return instruction, info_of_chunk
 36 |         else:
 37 |             raise ValueError("chosen should be in ['graph', 'table', 'algorithm', 'catalogue', 'chunk']")
 38 | 
 39 |     def do_construct_graph(self, instruction, docs, data_id):
 40 |         print(f"data_id: {data_id}, do_construct_graph...")
 41 |         docs, titles = self.split_content_and_tile(docs)
 42 | 
 43 |         graphs = []
 44 |         info_of_graph = ""
 45 |         raw_prompt = open("prompts/construct_graph.txt", "r").read()
 46 |         for d, doc in enumerate(docs):
 47 |             print(f"data_id: {data_id}, do_construct_graph... in doc {d}/{len(docs)} in docs ..")
 48 |             title = doc['title']
 49 |             content = doc['document']
 50 | 
 51 |             prompt = raw_prompt.format(
 52 |                 requirement=instruction, 
 53 |                 raw_content=content,
 54 |                 titles="\n".join(titles)
 55 |             )
 56 |             output = self.llm.response(prompt)
 57 |             info_of_graph += output.split("\n")[0][:128]
 58 |             graphs.append(f"{title}: {output}")
 59 | 
 60 |         output_path = f"{self.graph_kb_path}/data_{data_id}.json"
 61 |         json.dump(graphs, open(output_path, "w"), ensure_ascii=False, indent=4)
 62 | 
 63 |         return info_of_graph
 64 | 
 65 |     def do_construct_table(self, instruction, docs, data_id):
 66 |         print(f"data_id: {data_id}, do_construct_table...")
 67 |         docs, titles = self.split_content_and_tile(docs)
 68 | 
 69 |         tables = []
 70 |         info_of_table = ""
 71 |         raw_prompt = open("prompts/construct_table.txt", "r").read()
 72 |         for d, doc in enumerate(docs):
 73 |             print(f"data_id: {data_id}, do_construct_table... in doc {d}/{len(docs)} in docs ..")
 74 |             title = doc['title']
 75 |             content = doc['document']
 76 |             prompt = raw_prompt.format(
 77 |                 instruction=instruction, 
 78 |                 content=content
 79 |             )
 80 |             output = self.llm.response(prompt)
 81 |             info_of_table += output.split("\n")[0][:128]
 82 |             tables.append(f"{title}: {output}")
 83 | 
 84 |         output_path = f"{self.table_kb_path}/data_{data_id}.json"
 85 |         json.dump(tables, open(output_path, "w"), ensure_ascii=False, indent=4)
 86 | 
 87 |         return info_of_table
 88 | 
 89 |     def do_construct_chunk(self, instruction, docs, data_id):
 90 |         print(f"data_id: {data_id}, do_construct_chunk...")
 91 |         docs, titles = self.split_content_and_tile(docs)
 92 | 
 93 |         chunks = []
 94 |         for doc in docs: 
 95 |             title = doc['title']
 96 |             content = doc['document']
 97 |             chunks.append(f"{title}: {content}")
 98 | 
 99 |         output_path = f"{self.chunk_kb_path}/data_{data_id}.json"
100 |         json.dump(chunks, open(output_path, "w"), ensure_ascii=False, indent=4)
101 | 
102 |         info_of_chunk = " ".join(titles)
103 |         return info_of_chunk
104 | 
105 |     def do_construct_algorithm(self, instruction, docs, data_id):
106 |         print(f"data_id: {data_id}, do_construct_algorithm...")
107 |         docs, titles = self.split_content_and_tile(docs)
108 | 
109 |         algorithms = []
110 |         info_of_algorithm = ""
111 |         raw_prompt = open("prompts/construct_algorithm.txt", "r").read()
112 |         for d, doc in enumerate(docs):
113 |             print(f"data_id: {data_id}, do_construct_algorithm... in doc {d}/{len(docs)} in docs ..")
114 |             title = doc['title']
115 |             content = doc['document']
116 |             prompt = raw_prompt.format(
117 |                 requirement=instruction, 
118 |                 raw_content=content
119 |             )
120 |             output = self.llm.response(prompt)
121 |             info_of_algorithm += output.split("\n")[0][:128]
122 |             algorithms.append(f"{title}: {output}")
123 | 
124 |         output_path = f"{self.algorithm_kb_path}/data_{data_id}.json"
125 |         json.dump(algorithms, open(output_path, "w"), ensure_ascii=False, indent=4) 
126 | 
127 |         return info_of_algorithm
128 |         
129 |     def do_construct_catalogue(self, instruction, docs, data_id):
130 |         print(f"data_id: {data_id}, do_construct_catalogue...")
131 |         docs, titles = self.split_content_and_tile(docs)
132 | 
133 |         instruction = instruction.split("Query:\n")[1]
134 | 
135 |         catalogues = []
136 |         info_of_catalogue = ""
137 |         raw_prompt = open("prompts/construct_catalogue.txt", "r").read()
138 |         for d, doc in enumerate(docs):
139 |             print(f"data_id: {data_id}, do_construct_catalogue... in doc {d}/{len(docs)} in docs ..")
140 |             title = doc['title']
141 |             document = doc['document']
142 |             
143 |             len_document = len(document)
144 |             contents = [document]
145 | 
146 |             for c, content in enumerate(contents):
147 |                 print(f"data_id: {data_id}, do_construct_catalogue... in doc {d}/{len(docs)} in docs .. in content {c}/{len(contents)} in contents ..")
148 |                 prompt = raw_prompt.format(
149 |                     requirement=instruction, 
150 |                     raw_content=content
151 |                 )
152 |                 output = self.llm.response(prompt)
153 |                 info_of_catalogue += output.split("\n")[0][:128]
154 |                 catalogues.append(f"\n\n{title}: {output}")
155 | 
156 |         output_path = f"{self.catalogue_kb_path}/data_{data_id}.json"
157 |         json.dump(catalogues, open(output_path, "w"), ensure_ascii=False, indent=4)
158 | 
159 |         return info_of_catalogue
160 | 
161 |     def split_content_and_tile(self, docs_):
162 |         docs = []
163 |         titles = []
164 |         
165 |         raw_doc_list = docs_.strip("<标题起始符>").split("<标题起始符>")
166 | 
167 |         for raw_doc in raw_doc_list:
168 |             title = raw_doc.split('<标题终止符>')[0].strip()
169 |             content = raw_doc.split('<标题终止符>')[1].strip()
170 | 
171 |             docs.append({'title': title, 'document': content})
172 |             titles.append(title)
173 | 
174 |         return docs, titles
175 | 


--------------------------------------------------------------------------------
/train_router/accelerate_configs/deepspeed_zero1.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   deepspeed_multinode_launcher: standard
 5 |   gradient_accumulation_steps: 1
 6 |   zero3_init_flag: false
 7 |   zero_stage: 1
 8 | distributed_type: DEEPSPEED
 9 | downcast_bf16: 'no'
10 | machine_rank: 0
11 | main_training_function: main
12 | mixed_precision: 'bf16'
13 | num_machines: 1
14 | num_processes: 8
15 | rdzv_backend: static
16 | same_network: true
17 | tpu_env: []
18 | tpu_use_cluster: false
19 | tpu_use_sudo: false
20 | use_cpu: false
21 | 


--------------------------------------------------------------------------------
/train_router/accelerate_configs/deepspeed_zero2.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   deepspeed_multinode_launcher: standard
 5 |   offload_optimizer_device: none
 6 |   offload_param_device: none
 7 |   zero3_init_flag: false
 8 |   zero_stage: 2
 9 | distributed_type: DEEPSPEED
10 | downcast_bf16: 'no'
11 | machine_rank: 0
12 | main_training_function: main
13 | mixed_precision: 'bf16'
14 | num_machines: 1
15 | num_processes: 8
16 | rdzv_backend: static
17 | same_network: true
18 | tpu_env: []
19 | tpu_use_cluster: false
20 | tpu_use_sudo: false
21 | use_cpu: false
22 | 


--------------------------------------------------------------------------------
/train_router/accelerate_configs/deepspeed_zero3.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   deepspeed_multinode_launcher: standard
 5 |   offload_optimizer_device: none
 6 |   offload_param_device: none
 7 |   zero3_init_flag: true
 8 |   zero3_save_16bit_model: true
 9 |   zero_stage: 3
10 | distributed_type: DEEPSPEED
11 | downcast_bf16: 'no'
12 | machine_rank: 0
13 | main_training_function: main
14 | mixed_precision: bf16
15 | num_machines: 1
16 | num_processes: 8
17 | rdzv_backend: static
18 | same_network: true
19 | tpu_env: []
20 | tpu_use_cluster: false
21 | tpu_use_sudo: false
22 | use_cpu: false
23 | 


--------------------------------------------------------------------------------
/train_router/accelerate_configs/fsdp_qlora.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE                                                                                                                                           
 2 | debug: false                                                                                                                                                                 
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_cpu_ram_efficient_loading: true
 9 |   fsdp_forward_prefetch: false
10 |   fsdp_offload_params: true
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: SHARDED_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: false
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: 'bf16'
18 | num_machines: 1
19 | num_processes: 8
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false


--------------------------------------------------------------------------------
/train_router/accelerate_configs/multi_gpu.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: MULTI_GPU
 4 | downcast_bf16: 'no'
 5 | gpu_ids: all
 6 | machine_rank: 0
 7 | main_training_function: main
 8 | mixed_precision: 'bf16'
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | same_network: true
13 | tpu_env: []
14 | tpu_use_cluster: false
15 | tpu_use_sudo: false
16 | use_cpu: false
17 | 


--------------------------------------------------------------------------------
/train_router/accelerate_configs/single_gpu.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: "NO"
 4 | downcast_bf16: 'no'
 5 | gpu_ids: all
 6 | machine_rank: 0
 7 | main_training_function: main
 8 | mixed_precision: 'bf16'
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | same_network: true
13 | tpu_env: []
14 | tpu_use_cluster: false
15 | tpu_use_sudo: false
16 | use_cpu: false
17 | 


--------------------------------------------------------------------------------
/train_router/dpo.py:
--------------------------------------------------------------------------------
  1 | # flake8: noqa
  2 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """
 16 | # regular:
 17 | python examples/scripts/dpo.py \
 18 |     --dataset_name=trl-internal-testing/hh-rlhf-helpful-base-trl-style \
 19 |     --model_name_or_path=gpt2 \
 20 |     --per_device_train_batch_size 4 \
 21 |     --learning_rate 1e-3 \
 22 |     --gradient_accumulation_steps 1 \
 23 |     --logging_steps 10 \
 24 |     --eval_steps 500 \
 25 |     --output_dir="dpo_anthropic_hh" \
 26 |     --warmup_steps 150 \
 27 |     --report_to wandb \
 28 |     --bf16 \
 29 |     --logging_first_step \
 30 |     --no_remove_unused_columns
 31 | 
 32 | # peft:
 33 | python examples/scripts/dpo.py \
 34 |     --dataset_name=trl-internal-testing/hh-rlhf-helpful-base-trl-style \
 35 |     --model_name_or_path=gpt2 \
 36 |     --per_device_train_batch_size 4 \
 37 |     --learning_rate 1e-3 \
 38 |     --gradient_accumulation_steps 1 \
 39 |     --logging_steps 10 \
 40 |     --eval_steps 500 \
 41 |     --output_dir="dpo_anthropic_hh" \
 42 |     --optim rmsprop \
 43 |     --warmup_steps 150 \
 44 |     --report_to wandb \
 45 |     --bf16 \
 46 |     --logging_first_step \
 47 |     --no_remove_unused_columns \
 48 |     --use_peft \
 49 |     --lora_r=16 \
 50 |     --lora_alpha=16
 51 | """
 52 | 
 53 | import logging
 54 | import multiprocessing
 55 | import os
 56 | os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
 57 | os.environ["WANDB_DISABLED"] = "true"
 58 | from contextlib import nullcontext
 59 | 
 60 | from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser
 61 | from trl.env_utils import strtobool
 62 | 
 63 | TRL_USE_RICH = strtobool(os.getenv("TRL_USE_RICH", "0"))
 64 | 
 65 | if TRL_USE_RICH:
 66 |     init_zero_verbose()
 67 |     FORMAT = "%(message)s"
 68 | 
 69 |     from rich.console import Console
 70 |     from rich.logging import RichHandler
 71 | 
 72 | import torch
 73 | from datasets import load_dataset, load_from_disk
 74 | from transformers import AutoModelForCausalLM, AutoTokenizer
 75 | from accelerate import PartialState
 76 | from trl import (
 77 |     DPOConfig,
 78 |     DPOTrainer,
 79 |     ModelConfig,
 80 |     RichProgressCallback,
 81 |     get_kbit_device_map,
 82 |     get_peft_config,
 83 |     get_quantization_config,
 84 | )
 85 | 
 86 | 
 87 | if TRL_USE_RICH:
 88 |     logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()], level=logging.INFO)
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     parser = TrlParser((DPOScriptArguments, DPOConfig, ModelConfig))
 93 |     args, training_args, model_config = parser.parse_args_and_config()
 94 | 
 95 |     # show all arguments
 96 |     print(args)
 97 |     print(training_args)
 98 |     print(model_config)
 99 | 
100 |     # Force use our print callback
101 |     if TRL_USE_RICH:
102 |         training_args.disable_tqdm = True
103 |         console = Console()
104 | 
105 |     ################
106 |     # Model & Tokenizer
107 |     ################
108 |     torch_dtype = (
109 |         model_config.torch_dtype
110 |         if model_config.torch_dtype in ["auto", None]
111 |         else getattr(torch, model_config.torch_dtype)
112 |     )
113 |     quantization_config = get_quantization_config(model_config)
114 |     model_kwargs = dict(
115 |         revision=model_config.model_revision,
116 |         attn_implementation=model_config.attn_implementation,
117 |         torch_dtype=torch_dtype,
118 |         use_cache=False if training_args.gradient_checkpointing else True,
119 |         device_map=get_kbit_device_map() if quantization_config is not None else None,
120 |         quantization_config=quantization_config,
121 |     )
122 |     model = AutoModelForCausalLM.from_pretrained(
123 |         model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, **model_kwargs
124 |     )
125 |     peft_config = get_peft_config(model_config)
126 |     if peft_config is None:
127 |         ref_model = AutoModelForCausalLM.from_pretrained(
128 |             model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, **model_kwargs
129 |         )
130 |     else:
131 |         ref_model = None
132 |     tokenizer = AutoTokenizer.from_pretrained(
133 |         model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code
134 |     )
135 |     if tokenizer.pad_token is None:
136 |         tokenizer.pad_token = tokenizer.eos_token
137 |     if tokenizer.chat_template is None:
138 |         tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}"
139 |     if tokenizer.bos_token is None:   # qwen没有bos_token，要设置一下，不然dpo train时会报错。
140 |         tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token})
141 |         tokenizer.bos_token_id = tokenizer.eos_token_id
142 |     if args.ignore_bias_buffers:
143 |         # torch distributed hack
144 |         model._ddp_params_and_buffers_to_ignore = [
145 |             name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool
146 |         ]
147 | 
148 |     ################
149 |     # Optional rich context managers
150 |     ###############
151 |     init_context = nullcontext() if not TRL_USE_RICH else console.status("[bold green]Initializing the DPOTrainer...")
152 |     save_context = (
153 |         nullcontext()
154 |         if not TRL_USE_RICH
155 |         else console.status(f"[bold green]Training completed! Saving the model to {training_args.output_dir}")
156 |     )
157 | 
158 |     ################
159 |     # Dataset
160 |     ################
161 |     if "hh-rlhf-helpful-base-trl-style" in args.dataset_name: # official case
162 |         ds = load_dataset(args.dataset_name)
163 |         # ds = load_from_disk(args.dataset_name)
164 |     else:
165 |         data_files = {"train": f"{args.dataset_name}/train.json", "test": f"{args.dataset_name}/test.json"}
166 |         ds = load_dataset("json", data_files=data_files)
167 |     # ds = load_from_disk(args.dataset_name)
168 |     if args.sanity_check:
169 |         for key in ds:
170 |             ds[key] = ds[key].select(range(500))
171 | 
172 |     def process(row):
173 |         row["prompt"] = tokenizer.apply_chat_template(row["chosen"][:-1], tokenize=False)
174 |         row["chosen"] = tokenizer.apply_chat_template([row["chosen"][-1]], tokenize=False)
175 |         row["rejected"] = tokenizer.apply_chat_template([row["rejected"][-1]], tokenize=False)
176 |         return row
177 | 
178 |     # Compute that only on the main process for faster data processing.
179 |     # see: https://github.com/huggingface/trl/pull/1255
180 |     with PartialState().local_main_process_first():
181 |         ds = ds.map(process, num_proc=training_args.dataset_num_proc)
182 | 
183 |     train_dataset = ds[args.dataset_train_split]
184 |     eval_dataset = ds[args.dataset_test_split]
185 | 
186 |     ################
187 |     # Training
188 |     ################
189 |     with init_context:
190 |         trainer = DPOTrainer(
191 |             model,
192 |             ref_model,
193 |             args=training_args,
194 |             train_dataset=train_dataset,
195 |             eval_dataset=eval_dataset,
196 |             tokenizer=tokenizer,
197 |             peft_config=peft_config,
198 |             callbacks=[RichProgressCallback] if TRL_USE_RICH else None,
199 |         )
200 | 
201 |     trainer.train()
202 | 
203 |     with save_context:
204 |         trainer.save_model(training_args.output_dir)


--------------------------------------------------------------------------------
/train_router/train.sh:
--------------------------------------------------------------------------------
 1 | # export HF_ENDPOINT=https://hf-mirror.com
 2 | 
 3 | wandb disabled
 4 | 
 5 | if command -v nvidia-smi &> /dev/null; then
 6 |     NUM_GPUS=$(nvidia-smi -L | wc -l)
 7 |     echo "发现 ${NUM_GPUS} 块 GPU。"
 8 | else
 9 |     NUM_GPUS=-1
10 |     echo "nvidia-smi 命令不可用，请确保 NVIDIA 驱动已安装。"
11 | fi
12 | 
13 | 
14 | model_name=qwen
15 | model_path=/mnt/data/hf_models/Qwen2-7B-Instruct
16 | 
17 | dataset_name=weak
18 | dataset_path=data/${dataset_name}
19 | 
20 | config_file=deepspeed_zero2
21 | 
22 | tag=${dataset_name}_${model_name}_${config_file}
23 | 
24 | echo "dataset_name ${dataset_name}, tag ${tag}"
25 | 
26 | accelerate launch --config_file accelerate_configs/${config_file}.yaml --num_processes ${NUM_GPUS} dpo.py \
27 |     --dataset_name ${dataset_path} \
28 |     --model_name_or_path ${model_path} \
29 |     --num_train_epochs 3 \
30 |     --per_device_train_batch_size 1 \
31 |     --learning_rate 1e-5 \
32 |     --gradient_accumulation_steps 8 \
33 |     --logging_steps 3 \
34 |     --eval_steps 5 \
35 |     --output_dir output_model/${tag} \
36 |     --warmup_steps 5 \
37 |     --report_to none \
38 |     --bf16 \
39 |     --logging_first_step \
40 |     --max_prompt_length 512 \
41 |     --max_length 512 \
42 |     --no_remove_unused_columns > log/train_log/${tag}.log 2>&1
43 | 
44 | echo "Done."
45 | 


--------------------------------------------------------------------------------
/utilizer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | 
  4 | class Utilizer():
  5 |     def __init__(self, llm, chunk_kb_path, graph_kb_path, table_kb_path, algorithm_kb_path, catalogue_kb_path):
  6 |         self.llm = llm
  7 |         self.chunk_kb_path = chunk_kb_path
  8 |         self.graph_kb_path = graph_kb_path
  9 |         self.table_kb_path = table_kb_path
 10 |         self.algorithm_kb_path = algorithm_kb_path
 11 |         self.catalogue_kb_path = catalogue_kb_path
 12 | 
 13 |     def do_decompose(self, query, kb_info, data_id):
 14 |         print(f"data_id: {data_id}, do_decompose...")
 15 | 
 16 |         raw_prompt = open("prompts/decompose.txt", "r").read()
 17 |         prompt = raw_prompt.format(
 18 |             query=query, 
 19 |             kb_info=kb_info
 20 |         )
 21 |         output = self.llm.response(prompt) 
 22 |         subqueries = output.split("\n")
 23 | 
 24 |         return subqueries
 25 | 
 26 |     def do_extract(self, query, subqueries, chosen, data_id, extra_instruction=None):
 27 |         print(f"data_id: {data_id}, extraction...")
 28 | 
 29 |         if extra_instruction != None:
 30 |             subqueries = [subquery + extra_instruction for subquery in subqueries]
 31 |         
 32 |         if chosen == "chunk":
 33 |             subknowledges = self.do_extract_chunk(query, subqueries, data_id)
 34 |         elif chosen == "table":
 35 |             subknowledges = self.do_extract_table(query, subqueries, data_id)
 36 |         elif chosen == "graph":
 37 |             subknowledges = self.do_extract_graph(query, subqueries, data_id)
 38 |         elif chosen == "algorithm":
 39 |             subknowledges = self.do_extract_algorithm(query, subqueries, data_id)
 40 |         elif chosen == "catalogue":
 41 |             subknowledges = self.do_extract_catalogue(query, subqueries, data_id)
 42 |         else:
 43 |             raise ValueError("chosen should be in ['chunk', 'table', 'graph', 'algorithm', 'catalogue']")
 44 | 
 45 |         return subknowledges
 46 | 
 47 |     def do_extract_chunk(self, query, subqueries, data_id):
 48 |         chunks = json.load(open(f"{self.chunk_kb_path}/data_{data_id}.json"))
 49 | 
 50 |         composed_query = "\n".join(subqueries) 
 51 | 
 52 |         subknowledges = []
 53 |         for c, chunk in enumerate(chunks):
 54 |             print(f"retrieve chunk {c}/{len(chunks)} in chunks ..")
 55 | 
 56 |             prompt = f"Instruction:\nAnswer the Query based on the given Document.\n\nQuery:\n{composed_query}\n\nDocument:\n{chunk}\n\nOutput:"
 57 |             tmp_output = self.llm.response(prompt)
 58 |             title = chunk.split(":")[0]
 59 |             subknowledges.append(f"Retrieval result for {title}: {tmp_output}")
 60 | 
 61 |         return subknowledges   
 62 | 
 63 |     def do_extract_table(self, query, subqueries, data_id):
 64 |         print(f"data_id: {data_id}, do_extract_table...")
 65 | 
 66 |         tables = json.load(open(f"{self.table_kb_path}/data_{data_id}.json"))
 67 |         tables_content = ""
 68 |         for t, table in enumerate(tables):
 69 |             tables_content += f"Table {t+1}:\n{table}\n\n"
 70 | 
 71 |         subknowledges = []
 72 |         for s, subquery in enumerate(subqueries):
 73 |             print(f"data_id: {data_id}, do_extract_table... in subquery {s}/{len(subqueries)} in subqueries ..")
 74 |             prompt = f"Instruction:\nThe following Tables show multiple independent tables built from multiple documents.\nFilter these tables according to the query, retaining only the table information that helps answer the query.\nNote that you need to analyze the attributes and entities mentioned in the query and filter accordingly.\nThe information needed to answer the query must exist in one or several tables, and you need to check these tables one by one.\n\nTables:{tables_content}\n\nQuery:{subquery}\n\nOutput:"
 75 |             retrieval = self.llm.response(prompt)
 76 |             subknowledges.append(retrieval)
 77 | 
 78 |         return subknowledges
 79 |     
 80 |     def do_extract_graph(self, query, subqueries, data_id):
 81 |         print(f"data_id: {data_id}, do_extract_graph...")
 82 | 
 83 |         graphs = json.load(open(f"{self.graph_kb_path}/data_{data_id}.json"))
 84 |         graphs_content = "\n\n".join(graphs)
 85 | 
 86 |         subknowledges = []
 87 |         for s, subquery in enumerate(subqueries):
 88 |             print(f"data_id: {data_id}, do_extract_graph... in subquery {s}/{len(subqueries)} in subqueries ..")
 89 |             prompt = f"Instruction: According to the query, filter out the triples from all triples in the graph that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nGraphs:{graphs_content}\n\nQuery:{subquery}\n\nOutput:"
 90 |             retrieval = self.llm.response(prompt)
 91 |             subknowledges.append(retrieval)
 92 | 
 93 |         return subknowledges
 94 | 
 95 |     def do_extract_algorithm(self, query, subqueries, data_id):
 96 |         print(f"data_id: {data_id}, do_extract_algorithm...")
 97 | 
 98 |         algorithms = json.load(open(f"{self.algorithm_kb_path}/data_{data_id}.json"))
 99 |         algorithms_content = "\n\n".join(algorithms)
100 | 
101 |         subknowledges = []
102 |         for s, subquery in enumerate(subqueries):
103 |             print(f"data_id: {data_id}, do_extract_algorithm... in subquery {s}/{len(subqueries)} in subqueries ..")
104 |             prompt = f"Instruction: According to the query, filter out information from algorithm descriptions that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nAlgorithms:{algorithms_content}\n\nQuery:{subquery}\n\nOutput:"
105 |             retrieval = self.llm.response(prompt)
106 |             subknowledges.append(retrieval)
107 | 
108 |         return subknowledges
109 | 
110 |     def do_extract_catalogue(self, query, subqueries, data_id):
111 |         print(f"data_id: {data_id}, do_extract_catalogue...")
112 | 
113 |         catalogues = json.load(open(f"{self.catalogue_kb_path}/data_{data_id}.json"))
114 |         catalogues_content = "\n\n".join(catalogues)
115 | 
116 |         subknowledges = []
117 |         for s, subquery in enumerate(subqueries):
118 |             print(f"data_id: {data_id}, do_extract_catalogue... in subquery {s}/{len(subqueries)} in subqueries ..")
119 |             prompt = f"Instruction: According to the query, filter out information from the catalogue that can help answer the query.\nNote, carefully analyze the entities and relationships mentioned in the query and filter based on this information.\n\nCatalogues:{catalogues_content}\n\nQuery:{subquery}\n\nOutput:"
120 |             retrieval = self.llm.response(prompt)
121 |             subknowledges.append(retrieval)
122 | 
123 |         return subknowledges
124 | 
125 |     def do_merge(self, query, subqueries, subknowledges, chosen, data_id):
126 |         print(f"data_id: {data_id}, do_merge...")
127 | 
128 |         retrieval_of_chunk = ""
129 |         retrieval_of_graph = ""
130 |         retrieval_of_table = ""
131 |         retrieval_of_algorithm = ""
132 |         retrieval_of_catalogue = ""
133 | 
134 |         if chosen == "chunk":
135 |             subknowledges = "\n".join(subknowledges)
136 |             retrieval_of_chunk += f"Subquery: {query}\nRetrieval results:\n{subknowledges}\n\n"
137 |         elif chosen == "table":
138 |             for subquery, subknowledge in zip(subqueries, subknowledges):
139 |                 retrieval_of_table += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n"
140 |         elif chosen == "graph":
141 |             for subquery, subknowledge in zip(subqueries, subknowledges):
142 |                 retrieval_of_graph += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n"
143 |         elif chosen == "algorithm":
144 |             for subquery, subknowledge in zip(subqueries, subknowledges):
145 |                 retrieval_of_algorithm += f"Subquery: {subquery}\nRetrieval results:\n{subknowledge}\n\n"
146 |         elif chosen == "catalogue":
147 |             subknowledges = "\n".join(subknowledges)
148 |             retrieval_of_catalogue += f"Subquery: {query}\nRetrieval results:\n{subknowledges}\n\n"
149 |         else:
150 |             raise ValueError("chosen should be in ['chunk', 'table', 'graph', 'algorithm', 'catalogue']")
151 | 
152 |         decision = "No"
153 |         new_query = "No"
154 |         instruction = "1. Answer the Question based on retrieval results. \n2. Find the relevant information from given retrieval results and output as detailed, specific, and lengthy as possible. \n3. The output must be a coherent and smooth piece of text."
155 |         prompt = f"Instruction:\n{instruction}\n\nQuestion:\n{query}\n\nRetrieval:\n{retrieval_of_chunk}{retrieval_of_graph}{retrieval_of_table}{retrieval_of_algorithm}{retrieval_of_catalogue}"
156 | 
157 |         answer = self.llm.response(prompt)
158 | 
159 |         return answer, decision, new_query
160 | 


--------------------------------------------------------------------------------
/utils/qwenapi.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import json
 3 | import requests
 4 | import os
 5 | from transformers import AutoTokenizer
 6 | 
 7 | 
 8 | class QwenAPI():
 9 |     def __init__(self, url):
10 |         self.url = url
11 | 
12 |         print("loading tokenizer")
13 |         if os.path.exists("/mnt/data/lizhuoqun/hf_models/gpt2"):
14 |             self.tokenizer = AutoTokenizer.from_pretrained("/mnt/data/lizhuoqun/hf_models/gpt2")
15 |         else:
16 |             raise Exception("No model path found")
17 |         print("loading tokenizer done")
18 | 
19 |     def response(self, input_text, max_new_tokens=4096):
20 |         current_time = time.time()
21 |         
22 |         input_text_len = len(self.tokenizer(input_text)['input_ids'])
23 |         print(f"input_text_len: {input_text_len}")
24 |         if input_text_len > 128000:
25 |             print(f"input_text_len: {input_text_len}", "we reduce the input_text_len")
26 |             input_text = input_text[:int(len(input_text)*(128000/input_text_len))]
27 | 
28 |         url = self.url
29 |         headers = {
30 |             # "Content-Type": "application/json",
31 |             "Authorization": "EMPTY"
32 |         }
33 |         raw_info = {
34 |             "model": "Qwen",
35 |             "messages": [{"role": "user", "content": input_text}],
36 |             "seed": 1024,
37 |             "max_tokens": max_new_tokens
38 |         }
39 | 
40 |         data = json.dumps(raw_info)
41 |         # print(data)
42 | 
43 |         try_time = 0
44 |         response = None
45 |         while try_time < 3:
46 |             try_time += 1
47 | 
48 |             try:
49 |                 callback = requests.post(url, headers=headers, data=data, timeout=(10000, 10000))
50 |                 print("callback.status_code", callback.status_code)
51 |                 print(f"prompt_tokens: {callback.json()['usage']['prompt_tokens']}, total_tokens: {callback.json()['usage']['total_tokens']}, completion_tokens: {callback.json()['usage']['completion_tokens']}")
52 |             except Exception as e:
53 |                 print(f"(print in qwenapi.py callback, try_time {try_time}) Error: {e}")
54 |                 continue
55 | 
56 |             try:
57 |                 result = callback.json()
58 |                 # print(result)
59 |                 # print(result.keys())
60 |                 response = result['choices'][0]['message']['content']
61 |                 # print(response)
62 |                 # input()
63 |                 break
64 |             except Exception as e:
65 |                 print(f"(print in qwenapi.py response, try_time {try_time}) callback: {callback.json()} Error: {e}")
66 |                 if "Please reduce the length of the messages" in callback.json()['message']:
67 |                     current_tokne_len = callback.json()['message'].split("However, you requested")[1].split("tokens in the messages, Please")[0].strip()
68 |                     current_tokne_len = int(current_tokne_len)
69 |                     print(f"current_tokne_len: {current_tokne_len}")
70 |                     raw_info = {
71 |                         "model": "Qwen",
72 |                         "messages": [{"role": "user", "content": input_text[:int(len(input_text)*(128000/current_tokne_len))]}],
73 |                         "seed": 1024,
74 |                         "max_tokens": max_new_tokens
75 |                     }
76 |                     data = json.dumps(raw_info)
77 |                 continue    
78 | 
79 |         if response is None:
80 |             raise Exception(f"response is None")
81 | 
82 |         print("used time in this qwenapi:", (time.time()-current_time)/60, "min")
83 |         return response


--------------------------------------------------------------------------------