├── .gitignore ├── assets ├── overview.png ├── dev_set_results.png └── test_set_results.png ├── JUnit ├── jsoup-1.17.2.jar ├── lombok-1.18.24.jar ├── fastjson2-2.0.41.jar ├── junit-jupiter-api-5.8.1.jar ├── junit-jupiter-engine-5.8.1.jar ├── junit-platform-engine-1.8.1.jar ├── junit-platform-commons-1.8.1.jar ├── junit-platform-launcher-1.7.2.jar └── junit-platform-console-standalone-1.7.0.jar ├── ncb ├── utils.py ├── evaluate.py ├── exec_java.py ├── exec_python.py ├── execution.py └── test_setup.py ├── README.md └── results └── reference ├── reference_ncb_python_en.jsonl └── reference_ncb_python_zh.jsonl /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/assets/overview.png -------------------------------------------------------------------------------- /JUnit/jsoup-1.17.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/jsoup-1.17.2.jar -------------------------------------------------------------------------------- /JUnit/lombok-1.18.24.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/lombok-1.18.24.jar -------------------------------------------------------------------------------- /JUnit/fastjson2-2.0.41.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/fastjson2-2.0.41.jar -------------------------------------------------------------------------------- /assets/dev_set_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/assets/dev_set_results.png -------------------------------------------------------------------------------- /assets/test_set_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/assets/test_set_results.png -------------------------------------------------------------------------------- /JUnit/junit-jupiter-api-5.8.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-jupiter-api-5.8.1.jar -------------------------------------------------------------------------------- /JUnit/junit-jupiter-engine-5.8.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-jupiter-engine-5.8.1.jar -------------------------------------------------------------------------------- /JUnit/junit-platform-engine-1.8.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-platform-engine-1.8.1.jar -------------------------------------------------------------------------------- /JUnit/junit-platform-commons-1.8.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-platform-commons-1.8.1.jar -------------------------------------------------------------------------------- /JUnit/junit-platform-launcher-1.7.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-platform-launcher-1.7.2.jar -------------------------------------------------------------------------------- /JUnit/junit-platform-console-standalone-1.7.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/NaturalCodeBench/HEAD/JUnit/junit-platform-console-standalone-1.7.0.jar -------------------------------------------------------------------------------- /ncb/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import json 3 | import os 4 | import shutil 5 | from pathlib import Path 6 | from typing import List, Union 7 | import numpy as np 8 | 9 | 10 | def load_json(path): 11 | return json.load(open(path, encoding='utf-8')) 12 | 13 | 14 | def save_json(obj, path): 15 | with open(path, 'w', encoding='utf-8') as f: 16 | json.dump(obj, f, ensure_ascii=False, indent=2) 17 | 18 | 19 | def load_jsonl(path): 20 | res = [] 21 | with open(path, encoding='utf-8') as f: 22 | for line in f: 23 | res.append(json.loads(line)) 24 | return res 25 | 26 | 27 | def save_jsonl(obj, path): 28 | with open(path, 'w', encoding='utf-8') as f: 29 | for item in obj: 30 | f.write(json.dumps(item, ensure_ascii=False)+'\n') 31 | 32 | 33 | def del_file(path): 34 | for elm in Path(path).glob('*'): 35 | elm.unlink() if elm.is_file() else shutil.rmtree(elm) 36 | if os.path.exists(path): 37 | os.rmdir(path) 38 | 39 | 40 | def estimate_pass_at_k( 41 | num_samples: Union[int, List[int], np.ndarray], 42 | num_correct: Union[List[int], np.ndarray], 43 | k: int 44 | ) -> np.ndarray: 45 | """ 46 | Estimates pass@k of each problem and returns them in an array. 47 | """ 48 | 49 | def estimator(n: int, c: int, k: int) -> float: 50 | """ 51 | Calculates 1 - comb(n - c, k) / comb(n, k). 52 | """ 53 | if n - c < k: 54 | return 1.0 55 | return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) 56 | 57 | if isinstance(num_samples, int): 58 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) 59 | else: 60 | assert len(num_samples) == len(num_correct) 61 | num_samples_it = iter(num_samples) 62 | 63 | return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]) 64 | -------------------------------------------------------------------------------- /ncb/evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | from test_setup import write_test_files, extract_codes 5 | from execution import execution 6 | from utils import load_json, load_jsonl, save_json, save_jsonl, del_file 7 | 8 | 9 | def evaluate_code(data_dir, language, natural_lang, ckpt_name, ks, num_workers=64): 10 | file = f'results/{ckpt_name}/{ckpt_name}_ncb_{language}_{natural_lang}.jsonl' 11 | print(file) 12 | os.makedirs(data_dir, exist_ok=True) 13 | del_file(data_dir / f'{ckpt_name}') 14 | 15 | data = load_jsonl(file) 16 | dataset_size = len(data) 17 | testcases = load_jsonl(f'data/{language}_{natural_lang}/ncb_{language}_{natural_lang}.jsonl') 18 | 19 | print("Extracting code from response") 20 | data = extract_codes(data, testcases, language) 21 | 22 | print("Writing test files") 23 | input_files_path = Path(f'data/{language}_{natural_lang}/input_files') 24 | test_dir = write_test_files(data_dir, input_files_path, data, language, ckpt_name) 25 | 26 | print("Start evaluation") 27 | result = execution(data_dir / test_dir, ckpt_name, language, natural_lang, dataset_size, ks, num_workers) 28 | print(result, '\n') 29 | save_json(result, data_dir / test_dir / 'result.json') 30 | 31 | 32 | if __name__ == '__main__': 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--languages', type=str, nargs='+', default=['python', 'java'], help='programming language') 35 | parser.add_argument('--natural_langs', type=str, nargs='+', default=['zh', 'en'], help='natural language') 36 | parser.add_argument('--ckpt_name', type=str, required=True, default='reference', help='the name of ckpt that you want to evaluate') 37 | parser.add_argument('--num_workers', type=int, default=64, help='number of workers') 38 | parser.add_argument('--ks', type=int, nargs='+', default=[1], help='k of the pass@k') 39 | args = parser.parse_args() 40 | 41 | results = [] 42 | for nat_lang in args.natural_langs: 43 | for lang in args.languages: 44 | data_dir = Path(f'data/temp/{lang}_{nat_lang}_test') 45 | evaluate_code(data_dir, lang, nat_lang, args.ckpt_name, args.ks, args.num_workers) 46 | results.append(load_json(data_dir / f'{args.ckpt_name}/result.json')) 47 | save_jsonl(results, f'results/{args.ckpt_name}/results.jsonl') 48 | for res in results: 49 | print(res) 50 | -------------------------------------------------------------------------------- /ncb/exec_java.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import copy 3 | import os 4 | from pathlib import Path 5 | from utils import save_json 6 | 7 | 8 | def excute(tmp_dir: str, JUnit_path: str, timeout: int): 9 | if not tmp_dir: 10 | raise ValueError(f"pls enter the path for executing java") 11 | JUnit = f':{JUnit_path}/*' 12 | tr_file = "TestRunner.java" 13 | tr_name = "TestRunner" 14 | compilation_result = subprocess.run(["javac", "-cp",JUnit,tr_file], cwd=tmp_dir, timeout=timeout,capture_output=True) 15 | compile_returncode = compilation_result.returncode 16 | if compile_returncode == 0: 17 | try: 18 | exec_result = subprocess.run(["java", "-cp",JUnit,tr_name], cwd=tmp_dir, timeout=timeout,capture_output=True) 19 | if exec_result.returncode == 0: 20 | res = exec_result.stdout.decode() 21 | elif exec_result.returncode == 1: 22 | res = f"failed: execute error:\n{exec_result.stderr.decode()}" 23 | except subprocess.TimeoutExpired as e: 24 | res = "time out" 25 | except BaseException as e: 26 | res = f"failed: {e}" 27 | else: 28 | compile_error = compilation_result.stderr.decode('utf-8') 29 | res = f'failed: compilation error:\n{compile_error}' 30 | return res 31 | 32 | 33 | def eval_test(class_name, base_dir, JUnit_path, timeout: int): 34 | item = {} 35 | _id = class_name[class_name.find('_')+1:class_name.rfind('_')] 36 | base_dir = copy.deepcopy(base_dir) 37 | tmp_dir = Path(base_dir, class_name) 38 | compile_error = "" 39 | exe_error = "" 40 | _id = _id = str(class_name)[str(class_name).find('_')+1:str(class_name).rfind('_')] 41 | if not os.listdir(tmp_dir): 42 | item['_id'] = _id 43 | item['class_name'] = class_name 44 | item["compile_error"] = compile_error 45 | item["exe_error"] = exe_error 46 | item["test_result"] = 'nocode' 47 | save_json(item, Path(base_dir, tmp_dir, 'eval_report.json')) 48 | return item 49 | res = excute(tmp_dir, JUnit_path, timeout=timeout) 50 | if 'failed: compilation error' in res: 51 | compile_error = res 52 | 53 | if 'failed: execute error:' in res: 54 | exe_error = res 55 | 56 | item['_id'] = _id 57 | item['class_name'] = class_name 58 | item["compile_error"] = compile_error 59 | item["exe_error"] = exe_error 60 | item["test_result"] = res 61 | 62 | save_json(item, Path(base_dir, tmp_dir, 'eval_report.json')) 63 | return item 64 | -------------------------------------------------------------------------------- /ncb/exec_python.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | sys.path.append(os.getcwd()) 5 | from pathlib import Path 6 | import pytest 7 | from pytest_jsonreport.plugin import JSONReport 8 | from utils import save_jsonl, save_json 9 | 10 | 11 | def level(rate): 12 | if rate == 0: 13 | return '0' 14 | elif 0 < rate < 30: 15 | return '0_30' 16 | elif 30 <= rate < 60: 17 | return '30_60' 18 | elif 60 <= rate < 100: 19 | return '60_100' 20 | elif rate == 100: 21 | return '100' 22 | 23 | 24 | def run_python_test(base_dir, test_dir): 25 | _id = test_dir[test_dir.rfind('_')+1:] 26 | os.chdir(Path(base_dir, test_dir)) 27 | 28 | overall_result = {'total': 0, 'passed': 0, 'failed': 0} 29 | pass_statistics = {'0': 0, '0_30': 0, '30_60': 0, '60_100': 0, '100': 0} 30 | error = {'SyntaxError': 0, 'RuntimeError': 0} 31 | error_report = [] 32 | error_cases = 0 33 | 34 | plugin = JSONReport() 35 | pytest.main(['-s', '--cov', '--cov-report', 'json', '--disable-warnings'], plugins=[plugin]) 36 | 37 | if len(plugin.report.get('tests')) == 0: 38 | longrepr = plugin.report['collectors'][1]['longrepr'] 39 | error_report.append({"_id": _id, 'nodeid': plugin.report['collectors'][1]['nodeid'], 'longrepr': longrepr}) 40 | if "SyntaxError" in longrepr: 41 | error['SyntaxError'] += 1 42 | else: 43 | error['RuntimeError'] += 1 44 | save_json(error, Path(base_dir, test_dir, 'error.json')) 45 | save_jsonl(error_report, Path(base_dir, test_dir, 'error_report.jsonl')) 46 | save_json({'overall_result': overall_result, 'pass_cases': pass_statistics}, 47 | Path(base_dir, test_dir, 'raw_result.json')) 48 | return 49 | try: 50 | error_report.append({"_id": _id, "testcases_traceback": []}) 51 | for case in plugin.report.get('tests'): 52 | print(case) 53 | if 'traceback' not in case['call']: 54 | continue 55 | if 'AssertionError' not in case['call']['traceback'][0]['message']: 56 | error_cases += 1 57 | error_report[0]['testcases_traceback'].append({'nodeid': case['nodeid'], 58 | 'traceback': case['call']['longrepr']}) 59 | except Exception as e: 60 | pass 61 | error['RuntimeError'] += 1 if error_cases != 0 else 0 62 | 63 | summary = plugin.report.get("summary") 64 | passed = summary.get("passed", 0) 65 | total = summary.get("total", 0) 66 | overall_result['passed'] += passed 67 | overall_result['total'] += total 68 | if total != 0: 69 | pass_rate = (passed / total) * 100 70 | pass_statistics[level(pass_rate)] += 1 71 | 72 | if len(error_report[0]['testcases_traceback']) == 0: 73 | error_report = [] 74 | save_jsonl(error_report, Path(base_dir, test_dir, 'error_report.jsonl')) 75 | save_json(error, Path(base_dir, test_dir, 'error.json')) 76 | save_json({'overall_result': overall_result, 'pass_cases': pass_statistics}, 77 | Path(base_dir, test_dir, 'raw_result.json')) 78 | 79 | 80 | if __name__ == '__main__': 81 | parser = argparse.ArgumentParser() 82 | parser.add_argument('--base_dir', type=str, required=True) 83 | parser.add_argument('--test_dir', type=str, required=True) 84 | args = parser.parse_args() 85 | 86 | run_python_test(args.base_dir, args.test_dir) 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NaturalCodeBench (ACL 2024 Findings) 2 | 3 | This repository contains information, data and code of NaturalCodeBench: A Challenging Application-Driven Dataset for Code Synthesis Evaluation. 4 | 5 | ## 📌Introduction 6 | 7 | We propose NaturalCodeBench (NCB), a comprehensive code benchmark designed to mirror the complexity and variety of scenarios in real coding tasks. NCB comprises 402 high-quality problems in Python and Java, meticulously selected from an online coding service, covering 6 different domains. 8 | 9 |  10 | 11 | The overall framework of NaturalCodeBench is shown in the above image, including the data collection pipeline and the semi-automated pipeline. 12 | 13 | For a full description of NaturalCodeBench, please refer to the paper: https://arxiv.org/abs/2405.04520 14 | 15 | ## Dataset Summary 16 | 17 | To construct a challenging application-driven dataset for code synthesis evaluation, the seed problems of NCB are cleaned from the queries in coding online services, spanning across 6 domains: Artificial Intelligence, Data Science, Algorithm and Data Structure, Front-End, Software Engineering, and System Administration. 18 | 19 |
| Domains | 22 |#Problems | 23 |||
| Dev | 26 |Test | 27 |Total | 28 ||
| Software Engineering | 31 |44 | 32 |88 | 33 |132 | 34 |
| Data Science | 37 |32 | 38 |68 | 39 |100 | 40 |
| Algorithm and Data Structure | 43 |22 | 44 |73 | 45 |95 | 46 |
| System Administration | 49 |22 | 50 |17 | 51 |33 | 52 |
| Artificial Intelligence | 55 |15 | 56 |13 | 57 |28 | 58 |
| Front-End | 61 |11 | 62 |3 | 63 |14 | 64 |
\\1'\n # 应用替换\n text = re.sub(pattern, replacement, text, flags=re.DOTALL)\n\n return text\n\n\n\n```"}
63 | {"_id": 193, "response": "```python\n\ndef calculate_et(delta, rn, g, gamma, t, u2, es, ea):\n numerator = 0.408 * delta * (rn - g) + gamma * (900 / (t + 273)) * u2 * (es - ea)\n denominator = delta + gamma * (1 + 0.34 * u2)\n if denominator == 0:\n return 0\n et = numerator / denominator\n\n print(et)\n return et\n\n\n```"}
64 | {"_id": 194, "response": "```python\n\nfrom lxml import etree\n\nhtml = \"\"\"\n\n 3131\n AAAA\n BBB\n CCC\n
\n\"\"\"\n\nroot = etree.fromstring(html)\n\ndef has_strong_or_em(tag):\n if tag.find('strong') is not None or tag.find('em') is not None:\n return True\n return False\n\nfor p in root.iter('p'):\n if has_strong_or_em(p):\n print(\"P tag has strong or em tag\")\n else:\n print(\"P tag does not have strong or em tag\")\n\n\n```"} 65 | {"_id": 195, "response": "```python\n\nimport pandas as pd\n\ndef flatten_dict_to_rows(d):\n def flatten_dict(d, parent_key='', sep='.'):\n items = []\n for k, v in d.items():\n new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n if isinstance(v, dict):\n if not v:\n items.append((new_key, {}))\n else:\n items.extend(flatten_dict(v, new_key, sep=sep).items())\n else:\n items.append((new_key, v))\n return dict(items)\n\n flat_dict = flatten_dict(d)\n\n # Convert the flattened dictionary to a pandas DataFrame\n df = pd.DataFrame(list(flat_dict.items()), columns=['Key', 'Value'])\n print(df.to_dict())\n return df\n\n\n```"} 66 | {"_id": 196, "response": "```python\n\ndef is_mac_in_range(mac, mac_range):\n def mac_to_hex(mac_str):\n return mac_str.replace(':', '').replace('-', '').upper()\n\n # 解析MAC地址范围\n start_mac_str, end_mac_str = mac_range.split('-')\n start_mac = int(mac_to_hex(start_mac_str), 16)\n end_mac = int(mac_to_hex(end_mac_str), 16)\n mac_val = int(mac_to_hex(mac), 16)\n\n # 判断MAC地址是否在范围内\n return start_mac <= mac_val <= end_mac\n\n\n```"} 67 | {"_id": 197, "response": "```python\n\nimport numpy as np\nfrom scipy.optimize import curve_fit\n\n# Define the fractional order model with 2 constant phase elements and 3 resistors\ndef fractional_order_model(t, Q, R1, R2, R3, n1, n2):\n return Q * (1 - (1 / (R1 * (t ** n1) + 1)) - (1 / (R2 * (t ** n2) + 1)) - (1 / R3))\n\n# # Sample time and current data\n# time_data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n# current_data = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])\n#\n# # Fit the model to the data\n# popt, pcov = curve_fit(fractional_order_model, time_data, current_data, bounds=(0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\n# print(len(popt))\n# # The estimated battery capacity in Amp hour is the first parameter of the model\n# estimated_capacity_Ah = popt[0]\n#\n# print(\"Estimated battery capacity: {:.2f} Ah\".format(estimated_capacity_Ah))\n\n\n```"} 68 | {"_id": 198, "response": "```python\n\nimport numpy as np\n\ndef solve_quadratic(points):\n # Create matrix A using a list comprehension\n A = np.array([[x**2, x, 1] for x, y in points])\n # Create vector b\n b = np.array([y for x, y in points])\n # Use numpy.linalg.solve to solve for [a, b, c]\n coefficients = np.linalg.solve(A, b)\n\n return coefficients\n\n```"} 69 | {"_id": 199, "response": "```python\n\nimport ipaddress\n\ndef check_ip_overlap(ip_range1, ip_range2):\n ip_net1 = ipaddress.ip_network(ip_range1)\n ip_net2 = ipaddress.ip_network(ip_range2)\n\n return ip_net1.overlaps(ip_net2)\n\n\n\n```"} 70 | {"_id": 200, "response": "```python\n\nimport os\nimport re\n\ndef rename_files_in_folder(folder_path):\n for filename in os.listdir(folder_path):\n file_type = filename.split('.')[-1]\n new_filename = re.sub(\"[A-Za-z]\", \"\", filename[:-len(file_type)]) + file_type\n os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, new_filename))\n\n# Call the function with the path to the folder\n# rename_files_in_folder('/path/to/your/folder')\n\n\n```"} 71 | -------------------------------------------------------------------------------- /results/reference/reference_ncb_python_zh.jsonl: -------------------------------------------------------------------------------- 1 | {"_id": 131, "response": "```python\nimport string\nfrom collections import Counter\n\n\ndef word_count(file_path):\n try:\n # 读取文件内容\n with open(file_path, 'r', encoding='utf-8') as file:\n content = file.read()\n\n # 移除标点符号并转换为小写\n translator = str.maketrans(\"\", \"\", string.punctuation)\n content = content.translate(translator).lower()\n\n # 使用 Counter 统计单词出现次数\n words = content.split()\n word_counter = Counter(words)\n\n # 按照出现次数降序排列\n sorted_word_count = sorted(word_counter.items(), key=lambda x: x[1], reverse=True)\n\n for word, count in sorted_word_count:\n print(f\"'{word}': {count}\")\n\n except FileNotFoundError:\n print(f\"Error: File '{file_path}' not found.\")\n```"} 2 | {"_id": 132, "response": "```python\nimport re\n\n\ndef remove_html_tags(s: str) -> str:\n if not isinstance(s, str):\n raise ValueError(\"Input must be a string\")\n\n if not s:\n return \"\"\n\n # 定义新的正则表达式模式\n tag_pattern = r\"<[^>]*>\"\n comment_pattern = r\"\"\n doctype_pattern = r\"]*>\"\n\n # 移除 HTML 注释和 DOCTYPE\n s = re.sub(comment_pattern, \"\", s)\n s = re.sub(doctype_pattern, \"\", s)\n\n # 移除标签\n s = re.sub(tag_pattern, \"\", s)\n\n # 返回处理后的字符串\n return s.strip()\n\n\n\n```"} 3 | {"_id": 133, "response": "```python\nfrom datetime import datetime\n\nclass Schedule:\n def __init__(self):\n self.events = {}\n\n def add_event(self, event_name, event_date):\n if event_date in self.events:\n self.events[event_date].append(event_name)\n else:\n self.events[event_date] = [event_name]\n\n def view_events(self):\n for event_date, event_names in self.events.items():\n print(f\"{event_date.strftime('%Y-%m-%d %H:%M')}:\")\n for event_name in event_names:\n print(f\"- {event_name}\")\n print()\n\n def remove_event(self, event_name, event_date):\n if event_date in self.events:\n if event_name in self.events[event_date]:\n self.events[event_date].remove(event_name)\n if not self.events[event_date]:\n del self.events[event_date]\n print(f\"已删除事件:{event_name}\")\n return\n print(f\"未找到事件'{event_name}'\")\n\n\n```"} 4 | {"_id": 134, "response": "```python\nclass ListNode:\n def __init__(self, value=0, next=None):\n self.value = value\n self.next = next\n\n\ndef add_two_numbers(l1, l2):\n carry = 0\n dummy_head = ListNode()\n current = dummy_head\n\n while l1 or l2 or carry:\n # 获取当前节点的值\n val1 = l1.value if l1 else 0\n val2 = l2.value if l2 else 0\n\n # 计算和以及进位\n total = val1 + val2 + carry\n carry, digit = divmod(total, 10)\n\n # 创建新节点并更新当前节点的next\n current.next = ListNode(digit)\n current = current.next\n\n # 移动到下一个节点\n if l1:\n l1 = l1.next\n if l2:\n l2 = l2.next\n\n return dummy_head.next\n```"} 5 | {"_id": 135, "response": "```python\nfrom typing import List\nimport xml.etree.ElementTree as ET\n\ndef parse_xml(file_path: str, tag_name: str) -> List[str]:\n\n try:\n root = ET.parse(file_path).getroot()\n except ET.ParseError:\n return []\n data_list = []\n for element in root.findall(tag_name):\n data_list.append(element.text)\n return data_list\n\n```"} 6 | {"_id": 136, "response": "```python\nimport re\n\n\ndef is_valid_variable_name(name):\n \"\"\"\n 判断字符串是否是有效的 JavaScript 变量名\n \"\"\"\n if len(name) > 255:\n return False\n if not re.match(r'^[a-zA-Z_$][a-zA-Z0-9_$]*$', name):\n return False\n return True\n\n```"} 7 | {"_id": 137, "response": "```python\nclass Order:\n def __init__(self, order_id):\n self.order_id = order_id\n self.items = []\n self.total_price = 0.0\n self.status = \"未支付\"\n\n def add_item(self, item_name, item_price):\n if not isinstance(item_price, (int, float)) or item_price <= 0:\n return \"商品价格必须是正数\"\n\n item = {\"name\": item_name, \"price\": item_price}\n self.items.append(item)\n\n def remove_item(self, item_name):\n for item in self.items:\n if item[\"name\"] == item_name:\n self.items.remove(item)\n break\n else:\n return \"该商品不存在于订单中\"\n\n def calculate_total_price(self):\n self.total_price = sum(item[\"price\"] for item in self.items)\n\n def pay_order(self):\n if not self.items:\n return \"订单中没有商品\"\n self.status = \"已支付\"\n\n def cancel_order(self):\n self.status = \"已取消\"\n\n def view_order(self):\n print(\"订单编号:\", self.order_id)\n print(\"商品列表:\")\n for item in self.items:\n print(\"- {}: ¥{}\".format(item[\"name\"], item[\"price\"]))\n print(\"总价格:¥{}\".format(self.total_price))\n print(\"状态:\", self.status)\n```"} 8 | {"_id": 138, "response": "```python\ndef is_valid_sequence(s):\n # 判断字符串长度是否在 2 到 10 之间\n if len(s) < 2 or len(s) > 10:\n return False\n\n # 判断字符串中的字符是否都是小写字母\n for char in s:\n if not char.islower():\n return False\n\n # 判断字符串中是否出现连续的重复字符\n for i in range(len(s) - 1):\n if s[i] == s[i + 1]:\n return False\n\n # 判断左右两部分是否都是回文字符串或递增序列\n left, right = s[:len(s) // 2], s[len(s) // 2:]\n if left == right[::-1] or sorted(left) == sorted(right):\n return True\n else:\n return False\n```"} 9 | {"_id": 139, "response": "```python\nimport math\n\nclass RouteSimulator:\n def __init__(self, x, y):\n self.x = x\n self.y = y\n self.move_history = []\n\n def move(self, distance, angle_degrees):\n # 将角度转换为弧度\n angle_radians = math.radians(angle_degrees)\n\n # 计算新的位置\n new_x = self.x + distance * math.cos(angle_radians)\n new_y = self.y + distance * math.sin(angle_radians)\n\n # 更新位置\n self.x, self.y = new_x, new_y\n\n # 记录移动历史\n self.move_history.append({'distance': distance, 'angle_degrees': angle_degrees, 'position': (new_x, new_y)})\n\n def calculate_distance(self, x1, y1, x2, y2):\n # 计算两点之间的距离\n return math.sqrt((x2 - x1)**2 + (y2 - y1)**2)\n\n def print_current_position(self):\n print(f\"当前位置:({self.x:.2f}, {self.y:.2f})\")\n\n def print_move_history(self):\n print(\"移动历史:\")\n for move in self.move_history:\n distance = move['distance']\n angle_degrees = move['angle_degrees']\n position = move['position']\n print(f\"距离: {distance:.2f}, 角度: {angle_degrees:.2f}, 位置: ({position[0]:.2f}, {position[1]:.2f})\")\n\n```"} 10 | {"_id": 140, "response": "```python\ndef utf8_to_ascii(utf8_string):\n # 验证输入字符串是否为空\n if not utf8_string:\n print(\"输入字符串不能为空!\")\n return None\n\n result = []\n\n try:\n # 遍历输入字符串的每个字符\n for i in range(len(utf8_string)):\n if utf8_string[i] == '%' and i + 2 < len(utf8_string) and utf8_string[i + 1:i + 3].isalnum():\n # 如果遇到以%开头的16进制字符编码,则进行解析\n hex_str = utf8_string[i + 1:i + 3]\n ascii_code = int(hex_str, 16)\n result.append(ascii_code)\n i += 2\n else:\n # 普通的UTF-8字符直接转换为ASCII码\n ascii_code = ord(utf8_string[i])\n result.append(ascii_code)\n except IndexError:\n print(\"输入字符串格式错误!\")\n return None\n except ValueError:\n print(\"输入字符串包含非法的字符编码!\")\n return None\n except Exception as e:\n print(\"发生未知错误:\", str(e))\n return None\n\n return result\n\n\n\n```"} 11 | {"_id": 141, "response": "```python\nimport re\nimport datetime\n\n\ndef find_dates(text):\n if not text:\n return \"输入的文本为空,请提供有效的文本内容\"\n\n date_patterns = [\n r\"\\b(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\b\", # YYYY-MM-DD\n r\"\\b(0[1-9]|1[0-2])/(0[1-9]|[12][0-9]|3[01])/(19|20)?\\d{2}\\b\", # MM/DD/YYYY or M/D/YYYY\n r\"\\b(0[1-9]|[12][0-9]|3[01])-(0[1-9]|1[0-2])-(19|20)?\\d{2}\\b\", # DD-MM-YYYY or D-M-YYYY\n r\"\\b(0[1-9]|[12][0-9]|3[01])\\s(January|February|March|April|May|June|July|August|September|October|November|December),?\\s(19|20)?\\d{2}\\b\"\n # DD Month YYYY or D Month YYYY\n ]\n dates = []\n for pattern in date_patterns:\n dates.extend(re.findall(pattern, text))\n\n formatted_dates = []\n for date in dates:\n if date[0]: # 如果匹配到了年份\n year = date[0]\n else:\n year = \"20\" + date[3] if date[3] else \"\" # 如果没有匹配到年份,则根据模式中的分组确定年份\n\n month = date[1].zfill(2)\n day = date[2].zfill(2)\n\n # 检查日期的有效性\n try:\n formatted_date = f\"{year}-{month}-{day}\"\n _ = datetime.datetime.strptime(formatted_date, \"%Y-%m-%d\")\n formatted_dates.append(formatted_date)\n except ValueError:\n pass\n\n if not formatted_dates:\n return \"未找到有效日期\"\n\n return formatted_dates\n\n```"} 12 | {"_id": 142, "response": "```python\nclass Process:\n def __init__(self, pid, code):\n self.pid = pid\n self.code = code\n self.memory = None\n\ndef allocate_memory(memory_map, process):\n if process.memory is not None:\n raise ValueError(\"Process already has memory\")\n\n for i in range(len(memory_map)):\n if memory_map[i] == 0:\n process.memory = i\n for j in range(len(process.code)):\n memory_map[i + j] = 1\n return True\n\n return False\n\ndef deallocate_memory(memory_map, process):\n if process.memory is not None:\n for i in range(len(process.code)):\n memory_map[process.memory + i] = 0\n process.memory = None\n\ndef run_processes(processes):\n for process in processes:\n print(f\"Running Process {process.pid} with code: {process.code}\")\n\n# 创建一个简单的操作系统\nmemory_map = [0] * 100\nprocesses = []\n\n# 创建两个进程\nprocess1 = Process(pid=1, code=[1, 2, 3])\nprocess2 = Process(pid=2, code=[4, 5, 6])\n\n# 将进程添加到列表\nprocesses.append(process1)\nprocesses.append(process2)\n\n# 分配内存\nallocate_memory(memory_map, process1)\nallocate_memory(memory_map, process2)\n\n# 运行进程\nrun_processes(processes)\n\n# 释放内存\ndeallocate_memory(memory_map, process1)\ndeallocate_memory(memory_map, process2)\n\n```"} 13 | {"_id": 143, "response": "```python\nimport re\nfrom collections import Counter\nfrom bs4 import BeautifulSoup\n\n\ndef get_top_k_words_from_html(html, k):\n # 定义需要排除的单词列表\n exclude_list = ['this','is','are','a', 'an', 'the', 'and', 'or', 'in', 'on', 'at', 'to', 'of', 'for', 'with', 'by', 'from']\n\n try:\n # 解析HTML文本\n soup = BeautifulSoup(html, 'lxml')\n\n # 获取所有文本内容\n text = soup.get_text()\n\n # 将文本转换为小写,并使用正则表达式提取单词\n\n lower=text.lower()\n words = re.findall(r'\\b(? Tuple[List[str], List[float]]:\n cities = []\n temperatures = []\n lines = text.strip().split('\\n')\n for line in lines:\n city_temperature = line.split(',')\n if len(city_temperature) == 2:\n city, temperature = city_temperature\n temperature = float(temperature.replace('℃', ''))\n cities.append(city)\n temperatures.append(temperature)\n else:\n return [], []\n return cities, temperatures\n\ndef calculate_average_temperature(temperatures: List[float]) -> float:\n total_temperature = sum(temperatures)\n average_temperature = total_temperature / len(temperatures)\n return average_temperature\n\ndef find_highest_temperature(cities: List[str], temperatures: List[float]) -> Tuple[float, str]:\n if not temperatures:\n return 0, \"无\"\n index = temperatures.index(max(temperatures))\n highest_temperature = temperatures[index]\n city = cities[index]\n return highest_temperature, city\n\ndef find_lowest_temperature(cities: List[str], temperatures: List[float]) -> Tuple[float, str]:\n if not temperatures:\n return 0, \"无\"\n index = temperatures.index(min(temperatures))\n lowest_temperature = temperatures[index]\n city = cities[index]\n return lowest_temperature, city\n\n\n```"} 15 | {"_id": 145, "response": "```python\nimport re\n\n\ndef count_html_tags(html):\n pattern = r'<\\s*([a-zA-Z]+)[^>]*>|<\\s*/\\s*([a-zA-Z]+)\\s*>'\n matches = re.findall(pattern, html)\n\n tag_count = 0\n nested_tags = []\n max_nested_depth = 0\n\n for opening_tag, closing_tag in matches:\n if opening_tag:\n nested_tags.append(opening_tag.lower())\n tag_count += 1\n max_nested_depth = max(max_nested_depth, len(nested_tags))\n elif nested_tags and nested_tags[-1] == closing_tag.lower():\n nested_tags.pop()\n tag_count += 1\n\n return tag_count, max_nested_depth\n\n\n\n```"} 16 | {"_id": 146, "response": "```python\nfrom cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes\nfrom cryptography.hazmat.backends import default_backend\nimport base64\n\n\ndef encrypt(text, key):\n backend = default_backend()\n cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)\n encryptor = cipher.encryptor()\n\n # 补全文本长度至AES块大小的倍数\n text = text + ' ' * (16 - len(text) % 16)\n\n # 加密\n ciphertext = encryptor.update(text.encode()) + encryptor.finalize()\n return base64.b64encode(ciphertext).decode()\n\n\ndef decrypt(encrypted_text, key):\n backend = default_backend()\n cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)\n decryptor = cipher.decryptor()\n\n # 解密\n ciphertext = base64.b64decode(encrypted_text.encode())\n decrypted_text = decryptor.update(ciphertext) + decryptor.finalize()\n return decrypted_text.decode().rstrip()\n\n\ndef save_to_file(data, filename):\n with open(filename, 'w') as file:\n file.write(data)\n\n\ndef read_from_file(filename):\n with open(filename, 'r') as file:\n return file.read()\n\n\n\ntext_to_encrypt = \"Hello, World!\"\nencryption_key = b'Sixteen byte key'\n\n# # 加密并保存到文件\n# encrypted_text = encrypt(text_to_encrypt, encryption_key)\n# save_to_file(encrypted_text, \"encrypted_data.txt\")\n#\n# # 从文件中读取并解密\n# loaded_encrypted_text = read_from_file(\"encrypted_data.txt\")\n# decrypted_text = decrypt(loaded_encrypted_text, encryption_key)\n\n\n\n```"} 17 | {"_id": 147, "response": "```python\nimport os\nimport shutil\n\ndef backup_folder(source_folder, target_folder):\n try:\n # 检查源文件夹是否存在\n if not os.path.exists(source_folder):\n raise Exception(\"Source folder does not exist.\")\n\n # 检查目标文件夹是否存在,如果不存在则创建\n if not os.path.exists(target_folder):\n os.makedirs(target_folder)\n\n # 遍历源文件夹中的所有文件和子文件夹\n for root, dirs, files in os.walk(source_folder):\n # 构建目标文件夹中的相对路径\n relative_path = os.path.relpath(root, source_folder)\n target_path = os.path.join(target_folder, relative_path)\n\n # 创建目标文件夹中的子文件夹\n if not os.path.exists(target_path):\n os.makedirs(target_path)\n\n # 复制源文件夹中的所有文件到目标文件夹中\n for file in files:\n source_file = os.path.join(root, file)\n target_file = os.path.join(target_path, file)\n shutil.copy2(source_file, target_file)\n\n print(\"Backup completed successfully.\")\n\n except Exception as e:\n print(f\"An error occurred: {e}\")\n\n```"} 18 | {"_id": 148, "response": "```python\nimport os\nimport shutil\n\ndef create_folder(folder_name):\n if not os.path.exists(folder_name):\n os.makedirs(folder_name)\n print(f'Folder {folder_name} created.')\n\ndef create_and_write_file(file_path, content):\n with open(file_path, 'w', encoding='utf-8') as file:\n file.write(content)\n print(f'File {file_path} created and written.')\n\ndef read_and_print_file(file_path):\n with open(file_path, 'r', encoding='utf-8') as file:\n content = file.read()\n print(f'Contents of {file_path}:')\n print(content)\n\ndef copy_file(source_path, destination_path):\n shutil.copy(source_path, destination_path)\n print(f'File {source_path} copied to {destination_path}.')\n\ndef move_file(source_path, destination_path):\n shutil.move(source_path, destination_path)\n print(f'File {source_path} moved to {destination_path}.')\n\ndef delete_folder(folder_name):\n shutil.rmtree(folder_name)\n print(f'Folder {folder_name} and its contents deleted.')\n\n```"} 19 | {"_id": 149, "response": "```python\nimport re\n\n\ndef extract_phone_numbers(text):\n # 电话号码的正则表达式模式\n pattern = re.compile(r'\\(?\\b(\\d{3})\\)?[-.\\s]?(\\d{3})[-.\\s]?(\\d{4})\\b')\n\n # 使用 finditer 方法提取所有匹配的电话号码\n matches = pattern.finditer(text)\n\n # 存储提取到的电话号码\n phone_numbers = []\n\n for match in matches:\n # 获取匹配对象的匹配组\n groups = match.groups()\n\n # 格式化提取到的电话号码\n formatted_number = f\"({groups[0]}) {groups[1]}-{groups[2]}\"\n\n # 添加到结果列表\n phone_numbers.append(formatted_number)\n\n return phone_numbers\n\n\n```"} 20 | {"_id": 150, "response": "```python\nimport numpy as np\nfrom scipy.linalg import eig\n\n\ndef calculate_eigen(matrix, num_largest=5):\n # 计算特征值和特征向量\n eigenvalues, eigenvectors = eig(matrix)\n\n # 找到最大的几个特征值及其对应的索引\n largest_indices = np.argsort(eigenvalues)[-num_largest:]\n\n # 提取最大的几个特征值和对应的特征向量\n largest_eigenvalues = eigenvalues[largest_indices]\n largest_eigenvectors = eigenvectors[:, largest_indices]\n\n return largest_eigenvalues, largest_eigenvectors\n\n```"} 21 | {"_id": 151, "response": "```python\ndef flatten_list(nested_list):\n flattened_list = []\n stack = [nested_list]\n\n while stack:\n current = stack.pop()\n\n for element in current:\n if isinstance(element, list):\n stack.append(element)\n else:\n flattened_list.append(element)\n\n return flattened_list\n\n\n```"} 22 | {"_id": 152, "response": "```python\nimport openpyxl\n# 打开txt文件\n\ndef txt2excle(file_path):\n with open(file_path, 'r') as file:\n lines = file.readlines()\n\n # 创建Excel工作簿\n workbook = openpyxl.Workbook()\n sheet = workbook.active\n\n # 将列表数据写入Excel工作表\n for row, line in enumerate(lines, start=1):\n # 假设列表中的元素使用逗号进行分隔\n elements = line.strip().split('\\t')\n for col, element in enumerate(elements, start=1):\n sheet.cell(row=row, column=col).value = element\n\n # 保存为Excel文件\n workbook.save('output.xlsx')\n\n```"} 23 | {"_id": 153, "response": "```python\nimport xml.etree.ElementTree as ET\n \ndef calculate_iou(box1, box2):\n \"\"\"\n 计算两个边界框之间的IoU\n \"\"\"\n x1, y1, x2, y2 = box1\n x3, y3, x4, y4 = box2\n\n # 计算相交矩形的左上角和右下角坐标\n inter_x1 = max(x1, x3)\n inter_y1 = max(y1, y3)\n inter_x2 = min(x2, x4)\n inter_y2 = min(y2, y4)\n\n # 计算相交矩形的面积\n inter_area = max(0, inter_x2 - inter_x1 + 1) * max(0, inter_y2 - inter_y1 + 1)\n\n # 计算并集面积和IoU\n box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)\n box2_area = (x4 - x3 + 1) * (y4 - y3 + 1)\n union_area = box1_area + box2_area - inter_area\n iou = inter_area / union_area\n\n return iou\n\ndef convert_labels(label_file, target_region):\n \"\"\"\n 将在目标区域内与真实框标签的IoU大于0.2的所有框标签划分为类别0\n \"\"\"\n tree = ET.parse(label_file)\n root = tree.getroot()\n\n for object_elem in root.findall('object'):\n bbox = object_elem.find('bndbox')\n xmin = int(bbox.find('xmin').text)\n ymin = int(bbox.find('ymin').text)\n xmax = int(bbox.find('xmax').text)\n ymax = int(bbox.find('ymax').text)\n\n # 计算当前边界框与目标区域的IoU\n iou = calculate_iou(target_region, (xmin, ymin, xmax, ymax))\n\n # 如果IoU大于0.2,则将类别设置为0\n if iou > 0.2:\n object_elem.find('name').text = '0'\n\n tree.write(label_file)\n\n```"} 24 | {"_id": 154, "response": "```python\nimport os\nimport random\nimport shutil\n\n\ndef split_data(dataset_path,train_ratio,test_ratio,val_ratio):\n # 设置随机种子\n random_seed = 42\n random.seed(random_seed)\n train_path = os.path.join(dataset_path,'train')\n test_path = os.path.join(dataset_path,'test')\n val_path = os.path.join(dataset_path,'val')\n\n if not os.path.exists(train_path):\n os.makedirs(train_path)\n\n if not os.path.exists(test_path):\n os.makedirs(test_path)\n\n if not os.path.exists(val_path):\n os.makedirs(val_path)\n\n image_files = [f for f in os.listdir(dataset_path) if f.endswith('.jpg')]\n\n # 随机打乱图像文件列表\n random.shuffle(image_files)\n\n # 计算划分的索引位置\n train_split = int(len(image_files) * train_ratio)\n test_split = int(len(image_files) * (train_ratio + test_ratio))\n\n # 划分训练集\n train_files = image_files[:train_split]\n for file in train_files:\n src_path = os.path.join(dataset_path, file)\n dest_path = os.path.join(train_path, file)\n shutil.copy(src_path, dest_path)\n\n # 划分测试集\n test_files = image_files[train_split:test_split]\n for file in test_files:\n src_path = os.path.join(dataset_path, file)\n dest_path = os.path.join(test_path, file)\n shutil.copy(src_path, dest_path)\n\n # 划分验证集\n val_files = image_files[test_split:]\n for file in val_files:\n src_path = os.path.join(dataset_path, file)\n dest_path = os.path.join(val_path, file)\n shutil.copy(src_path, dest_path)\n\n\n# import os\n# import shutil\n# from sklearn.model_selection import train_test_split\n\n\n# def split_data(dataset_path, train_ratio, test_ratio, val_ratio):\n# # 获取所有文件的路径\n# files = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if\n# os.path.isfile(os.path.join(dataset_path, f))]\n#\n# # 划分训练集和其余部分\n# train_files, rest_files = train_test_split(files, train_size=train_ratio, random_state=42)\n#\n# # 计算测试集的比例\n# test_ratio_adjusted = test_ratio / (1 - train_ratio)\n#\n# # 划分测试集和验证集\n# test_files, val_files = train_test_split(rest_files, test_size=test_ratio_adjusted, random_state=42)\n#\n# # 创建训练集、测试集和验证集的文件夹\n# train_dir = os.path.join(dataset_path, 'train')\n# test_dir = os.path.join(dataset_path, 'test')\n# val_dir = os.path.join(dataset_path, 'val')\n# os.makedirs(train_dir, exist_ok=True)\n# os.makedirs(test_dir, exist_ok=True)\n# os.makedirs(val_dir, exist_ok=True)\n#\n# # 将文件移动到对应的文件夹\n# for f in train_files:\n# shutil.move(f, os.path.join(train_dir, os.path.basename(f)))\n# for f in test_files:\n# shutil.move(f, os.path.join(test_dir, os.path.basename(f)))\n# for f in val_files:\n# shutil.move(f, os.path.join(val_dir, os.path.basename(f)))\n```"} 25 | {"_id": 155, "response": "```python\nimport numpy as np\n\ndef hinge_loss(y_true, y_pred):\n return np.mean(np.maximum(0, 1 - y_true * y_pred))\n\n```"} 26 | {"_id": 156, "response": "```python\nimport os\n\n\ndef remove_empty_lines_folder(folder_path):\n for file_name in os.listdir(folder_path):\n if file_name.endswith('.txt'):\n file_path = os.path.join(folder_path, file_name)\n remove_empty_lines(file_path)\n\n\ndef remove_empty_lines(file_path):\n with open(file_path, 'r+') as file:\n lines = file.readlines()\n file.seek(0)\n file.truncate()\n for line in lines:\n if line.strip(): # 检查是否为空行\n file.write(line)\n\n# folder_path=r'C:\\Users\\Administrator\\Desktop\\work\\1207-Q17\\yolo\\labels'\n# remove_empty_lines_folder(folder_path)\n\n```"} 27 | {"_id": 157, "response": "```python\nimport os\n\n\ndef label_filter(folder_path,Label_category):\n\n txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]\n for file_name in txt_files:\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, 'r') as file:\n lines = file.readlines()\n\n new_lines = []\n\n\n for line in lines:\n parts = line.split()\n\n if int(parts[0]) in Label_category :\n continue\n new_line = ' '.join(parts) + '\\n'\n new_lines.append(new_line)\n\n # 将处理后的行写回文件\n with open(file_path, 'w') as file:\n file.writelines(new_lines)\n\n```"} 28 | {"_id": 158, "response": "```python\nimport os\n\n\ndef w_h_filter(folder_path,limit_wideth,limit_height):\n txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]\n for file_name in txt_files:\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, 'r') as file:\n lines = file.readlines()\n\n new_lines = []\n\n\n for line in lines:\n parts = line.split()\n\n if (float(parts[3]) < limit_wideth or float(parts[4]) < limit_height):\n continue\n new_line = ' '.join(parts) + '\\n'\n new_lines.append(new_line)\n\n # 将处理后的行写回文件\n with open(file_path, 'w') as file:\n file.writelines(new_lines)\n\n\n\n\n# if __name__ == \"__main__\":\n#\n# folder_path = r\"C:\\Users\\Administrator\\Desktop\\work\\1207-Q14\\labels\"\n# limit_wideth=0.1\n# limit_height=0.02\n# w_h_filter(folder_path,limit_wideth,limit_height)\n\n```"} 29 | {"_id": 159, "response": "```python\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef mean_squared_error(y_true, y_pred):\n return np.mean(np.square(y_true - y_pred))\n```"} 30 | {"_id": 160, "response": "```python\nimport nltk\nnltk.data.path.append('nltk_package')\nfrom nltk.tokenize import word_tokenize\nfrom nltk.stem import PorterStemmer\nfrom nltk import pos_tag\n\n\ndef process_sentence(sentence):\n tokens = word_tokenize(sentence)\n pos_tags = pos_tag(tokens)\n # stop_words = set(stopwords.words('english'))\n # filtered_tokens = [word.lower() for word in tokens if word.lower() not in stop_words]\n filtered_tokens = [word.lower() for word in tokens]\n stemmer = PorterStemmer()\n stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]\n # 输出结果\n print(\"Original Sentence:\\n\", sentence)\n print(\"\\nTokenized Sentence:\\n\", tokens)\n print(\"\\nPOS Tagged Sentence:\\n\", pos_tags)\n print(\"\\nSentence without Stopwords:\\n\", filtered_tokens)\n print(\"\\nStemmed Sentence:\\n\", stemmed_tokens)\n return tokens, pos_tags, filtered_tokens, stemmed_tokens\n\nprint(process_sentence(\"!@#$%^&*()\"))\n\n```"} 31 | {"_id": 161, "response": "```python\nimport pandas as pd\nfrom sklearn.preprocessing import OneHotEncoder\n\n\ndef calculate_and_save_probabilities(input_file, output_file):\n # 读取数据集\n df = pd.read_csv(input_file)\n\n # 计算概率\n probabilities = {}\n for column_value in df['Sex'].unique():\n prob = df[(df['Sex'] == column_value) & (df[\"Survived\"] == 1)].shape[0] / df[df['Sex'] == column_value].shape[0]\n probabilities[f\"Probability({'Sex'}={column_value}, {'Survived'}=1)\"] = prob\n\n # 打印计算结果\n for key, value in probabilities.items():\n print(key + \":\", value)\n\n # 进行独热编码\n non_numeric_columns = df.select_dtypes(include=['object']).columns\n encoder = OneHotEncoder(sparse=False)\n encoded_features = pd.DataFrame(encoder.fit_transform(df[non_numeric_columns]))\n encoded_features.columns = encoder.get_feature_names_out(non_numeric_columns)\n\n # 替换原始数据集中的非数值列\n df.drop(non_numeric_columns, axis=1, inplace=True)\n df = pd.concat([df, encoded_features], axis=1)\n\n # 将数据集保存到新文件\n df.to_csv(output_file, index=False)\n\n return probabilities[f\"Probability(Sex=male, Survived=1)\"], probabilities[\"Probability(Sex=female, Survived=1)\"]\n\n\nprint(calculate_and_save_probabilities(\"test6/dataset.csv\", \"test6/test_label.csv\"))\n```"} 32 | {"_id": 162, "response": "```python\nimport numpy as np\n\n\ndef linear_fit(x, y):\n x = np.array(x)\n y = np.array(y)\n\n mean_x = np.mean(x)\n mean_y = np.mean(y)\n\n SS_xy = np.dot(x - mean_x, y - mean_y)\n SS_xx = np.dot(x - mean_x, x - mean_x)\n a = SS_xy / SS_xx\n b = mean_y - a * mean_x\n\n r = np.corrcoef(x, y)[0, 1]\n\n return a, b, r\n\n\n```"} 33 | {"_id": 163, "response": "```python\nimport os\n\ndef list_files(directory_path):\n # 检查目录是否存在\n if not os.path.exists(directory_path) or not os.path.isdir(directory_path):\n print(\"Invalid directory path.\")\n return\n\n list_files_recursive(directory_path)\n\ndef list_files_recursive(directory):\n # 获取目录中的所有文件和子目录\n files = os.listdir(directory)\n for file in files:\n file_path = os.path.join(directory, file)\n if os.path.isfile(file_path):\n # 如果是文件,则输出文件名\n print(file)\n elif os.path.isdir(file_path):\n # 如果是目录,则输出目录名,并递归调用列出子目录\n list_files_recursive(file_path)\n\n```"} 34 | {"_id": 164, "response": "```python\nimport os\n\ndef find_file(file_name, directory_path):\n exist = False\n # 使用os.walk遍历指定目录及其子目录中的文件\n for root, dirs, files in os.walk(directory_path):\n for file in files:\n if file == file_name:\n exist = True\n # 如果找到文件,输出文件的绝对路径\n file_path = os.path.join(root, file)\n print(file_path)\n \n if not exist:\n print(\"The file is not on disk\")\n\n```"} 35 | {"_id": 165, "response": "```python\nimport csv\n\ndef calculate_average(filename, column_index):\n with open(filename, 'r') as file:\n reader = csv.reader(file)\n next(reader) \n total, count = 0, 0\n for row in reader:\n if row[column_index].isdigit():\n total += int(row[column_index])\n count += 1\n return total / count if count != 0 else 0\n\n\n```"} 36 | {"_id": 166, "response": "```python\nimport os\nimport pandas as pd\n\n\ndef find_max_temperature(folder_path, output_file):\n csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]\n \n result_df = pd.DataFrame(columns=['City', 'Max_Temperature', 'Date'])\n \n for csv_file in csv_files:\n file_path = os.path.join(folder_path, csv_file)\n df = pd.read_csv(file_path)\n \n city_name = csv_file[:-4]\n \n # 找到最高气温的值\n max_temp = df['Temperature'].max()\n \n # 找到所有最高气温的日期\n max_temp_dates = df.loc[df['Temperature'] == max_temp, 'Date'].tolist()\n \n # 将结果添加到result_df\n for date in max_temp_dates:\n result_df = result_df._append({'City': city_name,\n 'Max_Temperature': max_temp,\n 'Date': date}, ignore_index=True)\n \n result_df.to_csv(output_file, index=False)\n\n```"} 37 | {"_id": 167, "response": "```python\nimport pandas as pd\nimport torch\n\ndef merge_and_smooth_data(filenames, window_size):\n dfs = [pd.read_csv(filename) for filename in filenames]\n merged_df = pd.concat(dfs).sort_values(by='timestamp')\n\n tensor = torch.tensor(merged_df.drop(columns=['timestamp']).values)\n\n cumsum = tensor.cumsum(dim=0)\n sliding_avg = (cumsum[window_size:] - cumsum[:-window_size]) / float(window_size)\n return sliding_avg\n\n\n```"} 38 | {"_id": 168, "response": "```python\nfrom PIL import Image\nimport os\n\n\ndef resize_and_rename_images(input_folder, output_folder, target_resolution=(800, 600)):\n if not os.path.exists(output_folder):\n os.makedirs(output_folder)\n image_files = [f for f in os.listdir(input_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]\n image_files.sort() # 按文件名排序\n for i, image_file in enumerate(image_files):\n input_path = os.path.join(input_folder, image_file)\n output_path = os.path.join(output_folder, f\"{i + 1:03d}.jpg\") # 格式化文件名\n\n # 打开图片\n img = Image.open(input_path)\n\n # 转换为RGB模式\n img = img.convert(\"RGB\")\n\n # 统一分辨率\n img = img.resize(target_resolution, Image.LANCZOS)\n\n # 保存图片\n img.save(output_path)\n\n print(\"Modified images resolutions:\")\n print_resolutions(output_folder)\n return output_folder\n\n\ndef print_resolutions(folder_path):\n image_files = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))]\n image_files.sort()\n\n for image_file in image_files:\n image_path = os.path.join(folder_path, image_file)\n img = Image.open(image_path)\n resolution = img.size\n print(f\"{image_file}: {resolution[0]} x {resolution[1]}\")\n\n\nresize_and_rename_images('test6/input', 'test6/output', (2560, 1440))\n```"} 39 | {"_id": 169, "response": "```python\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass FocalLoss(nn.Module):\n def __init__(self, alpha=0.25, gamma=2.0):\n super(FocalLoss, self).__init__()\n self.alpha = alpha\n self.gamma = gamma\n\n def forward(self, inputs, targets):\n \n BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')\n pt = torch.exp(-BCE_loss) \n focal_loss = self.alpha * (1-pt)**self.gamma * BCE_loss\n return focal_loss.mean()\n\n\ncriterion = FocalLoss(alpha=0.25, gamma=2.0)\ninputs = torch.tensor([0.1, 0.2, 0.4, 0.8, 1.2, -0.1, -0.2, -0.4, -0.8, -1.2], requires_grad=True)\ntargets = torch.tensor([1., 0., 1., 0., 1., 0., 1., 0., 1., 0.])\n\nloss = criterion(inputs, targets).detach()\nprint(loss)\n\n\n```"} 40 | {"_id": 170, "response": "```python\nimport os\nimport shutil\n\n\ndef copy_and_move_file(source_path, destination_directory):\n # 获取源文件的文件名\n file_name = os.path.basename(source_path)\n\n # 构建目标路径\n destination_path = os.path.join(destination_directory, file_name)\n\n # 检查源文件是否存在\n if os.path.exists(source_path):\n # 复制文件\n shutil.copy(source_path, destination_path)\n else:\n print(\"File not exist.\")\n\n```"} 41 | {"_id": 171, "response": "```python\nfrom docx import Document\n\n\ndef count_keywords_in_docx(docx_path, keywords):\n doc = Document(docx_path)\n text = \" \".join([paragraph.text for paragraph in doc.paragraphs])\n\n keyword_counts = {}\n for keyword in keywords:\n count = text.lower().count(keyword.lower())\n keyword_counts[keyword] = count\n\n return keyword_counts\n\n```"} 42 | {"_id": 172, "response": "```python\nimport shutil\nimport os\n\ndef delete_folder(folder_path):\n try:\n if os.path.exists(folder_path):\n if os.path.isdir(folder_path):\n shutil.rmtree(folder_path)\n print(f\"Successfully delete the folder.\")\n else:\n print(f\"{folder_path} is not a directory\")\n else:\n print(f\"Path not exist: {folder_path}\")\n except Exception as e:\n print(f\"Can't delete the folder.\")\n\n```"} 43 | {"_id": 173, "response": "```python\nimport numpy as np\nimport scipy.sparse as sp\n\n\ndef csr_to_numpy(input_file, output_file):\n input_arr_csr = sp.load_npz(input_file)\n input_arr = input_arr_csr.toarray()\n np.save(output_file, input_arr)\n\n```"} 44 | {"_id": 174, "response": "```python\nimport shutil\nimport os\nimport threading\nimport time\n\n\ndef copy_folder(source_folder, dest_folders):\n for dest_folder in dest_folders:\n try:\n if os.path.exists(dest_folder):\n shutil.rmtree(dest_folder)\n shutil.copytree(source_folder, dest_folder)\n print(\"success from {source_folder} to {dest_folder}\")\n except Exception as e:\n print(\"copy {source_folder} to {dest_folder} is err:{e}\")\ndef create_thread(source_folder, dest_folders):\n # 计算每个线程需要拷贝的目标文件夹数量\n threads_num = len(dest_folders)\n folders_per_thread = len(dest_folders) // threads_num\n\n threads = []\n for i in range(threads_num):\n start = i * folders_per_thread\n end = start + folders_per_thread\n if i == threads_num - 1:\n end = len(dest_folders)\n thread = threading.Thread(target=copy_folder, args=(source_folder, dest_folders[start:end]))\n threads.append(thread)\n \n return threads\n \ndef run(threads):\n # 记录开始时间\n start_time = time.time()\n # 启动所有线程\n for thread in threads:\n thread.start()\n # 等待所有线程结束\n for thread in threads:\n thread.join()\n # 记录结束时间并输出用时\n end_time = time.time()\n print(f\"time: {end_time - start_time} 秒\")\n \n \ndef multithread_copy(source_folder, dest_folders):\n threads = create_thread(source_folder, dest_folders)\n run(threads)\n\n```"} 45 | {"_id": 175, "response": "```python\nimport numpy as np\nimport scipy.sparse as sp\n\ndef numpy_to_csr(input_file, output_file):\n input_arr = np.load(input_file)\n input_arr_csr = sp.csr_matrix(input_arr)\n sp.save_npz(output_file, input_arr_csr)\n```"} 46 | {"_id": 176, "response": "```python\nimport numpy as np\nfrom sklearn.metrics.pairwise import cosine_similarity\n\n\ndef calculate_similarity(file_A, file_B, target_file):\n A = np.load(file_A)\n B = np.load(file_B)\n cos_sim = cosine_similarity(A, B)\n np.save(target_file, cos_sim)\n\n\n```"} 47 | {"_id": 177, "response": "```python\nimport torch\nimport numpy as np\n\n\ndef process_tensor(array1, array2):\n tensor1 = torch.FloatTensor(array1)\n tensor2 = torch.FloatTensor(array2)\n tensor2 = tensor2.permute(0, 2, 1)\n attention_scores = torch.bmm(tensor1, tensor2)\n with open('result.txt', 'w') as file:\n file.write(str(attention_scores))\n return attention_scores\n\n\narray1 = np.ones((100, 130, 80))\narray2 = np.ones((100, 130, 80))\nprint(process_tensor(array1, array2).shape)\n\n\n```"} 48 | {"_id": 178, "response": "```python\nimport math\n\ndef euclidean_distance(point1, point2):\n return math.sqrt(sum((p - q) ** 2 for p, q in zip(point1, point2)))\n\ndef manhattan_distance(point1, point2):\n return sum(abs(p - q) for p, q in zip(point1, point2))\n\ndef chebyshev_distance(point1, point2):\n return max(abs(p - q) for p, q in zip(point1, point2))\n\ndef minkowski_distance(point1, point2, p):\n return math.pow(sum(abs(p - q) ** p for p, q in zip(point1, point2)), 1/p)\n\ndef cosine_similarity(vector1, vector2):\n dot_product = sum(p * q for p, q in zip(vector1, vector2))\n norm_vector1 = math.sqrt(sum(p ** 2 for p in vector1))\n norm_vector2 = math.sqrt(sum(q ** 2 for q in vector2))\n return dot_product / (norm_vector1 * norm_vector2)\n\n\ndef hamming_distance(string1, string2):\n return sum(p != q for p, q in zip(string1, string2))\n\ndef jaccard_similarity(set1, set2):\n intersection_size = len(set1.intersection(set2))\n union_size = len(set1.union(set2))\n return intersection_size / union_size\n\n\n\n\n```"} 49 | {"_id": 179, "response": "```python\nimport numpy as np\n\ndef diag_zero(matrix):\n matrix[np.triu_indices(matrix.shape[0], k=1)] = 0\n sum_remaining_values = np.sum(matrix)\n return sum_remaining_values\n\nmatrix = np.arange(1, 10).reshape(3, 3)\nprint(diag_zero(matrix))\n\n\n```"} 50 | {"_id": 180, "response": "```python\nimport numpy as np\n\n\ndef array_avg(matrix):\n # 计算数组元素的平均值\n mean_value = np.mean(matrix)\n\n # 大于平均值的元素置为零\n matrix[matrix > mean_value] = 0\n\n # 将数组展开为一维列表\n flattened_array = matrix.flatten()\n return mean_value, flattened_array\n\nmatrix = [[2,1,4,3],[4,5,8,9],[10,2,3,4],[1,14,21,2]]\nmatrix = np.array(matrix)\nprint(array_avg(matrix))\n\n```"} 51 | {"_id": 181, "response": "```python\nimport os\n\ndef count_files(dir):\n count = 0\n for root, dirs, files in os.walk(dir):\n count += len(files)\n return count\n\n```"} 52 | {"_id": 182, "response": "```python\nimport pandas as pd\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\n\ndef pac_fit(input_file):\n characters = pd.read_csv(input_file)\n characters = StandardScaler().fit_transform(characters)\n pca = PCA().fit(characters)\n return pca.explained_variance_ratio_\n\n\ndf = pd.DataFrame({'A': [1, 2], 'B': [2, 3], 'C': [3, 4]})\ndf.to_csv('test.csv', index=False)\nprint(pac_fit('test.csv'))\n```"} 53 | {"_id": 183, "response": "```python\nimport os\nimport shutil\nimport zipfile\nfrom datetime import datetime\nimport glob\n\ndef backup(source_path, backup_folder, max_size_mb=10):\n # 创建备份文件夹\n if not os.path.exists(backup_folder):\n os.makedirs(backup_folder)\n\n # 生成时间戳\n timestamp = datetime.now().strftime('%Y%m%d%H%M%S')\n\n # 构建备份文件名\n backup_filename = f\"backup_{timestamp}.zip\"\n backup_path = os.path.join(backup_folder, backup_filename)\n\n # 备份文件夹\n shutil.make_archive(backup_path[:-4], 'zip', source_path)\n\n # 获取备份文件夹总大小\n total_size_mb = sum(os.path.getsize(f) for f in glob.glob(os.path.join(backup_folder, '*.zip'))) / (1024 ** 2)\n\n # 如果总大小超过设定的阈值,删除最旧的备份文件\n while total_size_mb > max_size_mb:\n oldest_backup = min(glob.glob(os.path.join(backup_folder, '*.zip')), key=os.path.getctime)\n total_size_mb -= os.path.getsize(oldest_backup) / (1024 ** 2)\n os.remove(oldest_backup)\n\n print(f\"Backup completed: {backup_path}\")\n```"} 54 | {"_id": 184, "response": "```python\nimport torch\n\ndef bbox_iou(boxes1, boxes2):\n \"\"\"\n 计算两组对应位置bounding boxes的IoU。\n \n 参数:\n boxes1, boxes2: 维度为[N, 4]的tensor,表示bounding boxes,格式为[x, y, w, h]。\n \n 返回:\n 一个长度为N的tensor,表示boxes1和boxes2中对应box的IoU。\n \"\"\"\n assert boxes1.shape == boxes2.shape\n boxes1_x1 = boxes1[:, 0] - boxes1[:, 2] / 2\n boxes1_y1 = boxes1[:, 1] - boxes1[:, 3] / 2\n boxes1_x2 = boxes1[:, 0] + boxes1[:, 2] / 2\n boxes1_y2 = boxes1[:, 1] + boxes1[:, 3] / 2\n\n boxes2_x1 = boxes2[:, 0] - boxes2[:, 2] / 2\n boxes2_y1 = boxes2[:, 1] - boxes2[:, 3] / 2\n boxes2_x2 = boxes2[:, 0] + boxes2[:, 2] / 2\n boxes2_y2 = boxes2[:, 1] + boxes2[:, 3] / 2\n\n\n inter_x1 = torch.max(boxes1_x1, boxes2_x1)\n inter_y1 = torch.max(boxes1_y1, boxes2_y1)\n inter_x2 = torch.min(boxes1_x2, boxes2_x2)\n inter_y2 = torch.min(boxes1_y2, boxes2_y2)\n\n inter_area = torch.clamp(inter_x2 - inter_x1, min=0) * torch.clamp(inter_y2 - inter_y1, min=0)\n\n\n boxes1_area = (boxes1_x2 - boxes1_x1) * (boxes1_y2 - boxes1_y1)\n boxes2_area = (boxes2_x2 - boxes2_x1) * (boxes2_y2 - boxes2_y1)\n\n\n union_area = boxes1_area + boxes2_area - inter_area\n\n\n iou = inter_area / union_area\n\n return iou\n\n\n\n\n```"} 55 | {"_id": 185, "response": "```python\nimport re\nfrom collections import defaultdict\nimport json\n\ndef nginx_log_analysis(nginx_log_file, statistics_output_file, high_frequency_output_file):\n # 定义一个字典用于存储IP地址访问次数\n ip_count = defaultdict(int)\n\n # 定义一个集合用于存储高频率访问的IP地址\n high_frequency_ips = defaultdict(int)\n \n # 定义正则表达式来匹配IP地址\n ip_pattern = re.compile(r'\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b')\n \n # 打开Nginx日志文件并逐行处理\n with open(nginx_log_file, 'r') as file:\n for line in file:\n # 使用正则表达式匹配IP地址\n match = ip_pattern.search(line)\n if match:\n ip_address = match.group(0)\n \n # 增加IP地址访问次数\n ip_count[ip_address] += 1\n \n # 判断是否是高频率访问\n if ip_count[ip_address] > 30:\n high_frequency_ips[ip_address] = ip_count[ip_address]\n\n print(ip_count)\n # 输出IP地址及其访问次数到统计文件\n with open(statistics_output_file, 'w') as output:\n json.dump(ip_count, output, ensure_ascii=False, indent=2)\n \n # 输出高频率访问的IP地址到单独的文件\n with open(high_frequency_output_file, 'w') as output:\n json.dump(high_frequency_ips, output, ensure_ascii=False, indent=2)\n\n```"} 56 | {"_id": 186, "response": "```python\ndef lengthLongestPath(input: str) -> int:\n st = []\n ans, i, n = 0, 0, len(input)\n while i < n:\n # 检测当前文件的深度\n depth = 1\n while i < n and input[i] == '\\t':\n depth += 1\n i += 1\n # 统计当前文件名的长度\n length, isFile = 0, False\n while i < n and input[i] != '\\n':\n if input[i] == '.':\n isFile = True\n length += 1\n i += 1\n i += 1 # 跳过换行符\n while len(st) >= depth:\n st.pop()\n if st:\n length += st[-1] + 1\n if isFile:\n ans = max(ans, length)\n else:\n st.append(length)\n return ans\n\n\n```"} 57 | {"_id": 187, "response": "```python\nfrom sklearn.cluster import KMeans\nimport numpy as np\n\ndef k_means_clustering(data, num_clusters):\n kmeans = KMeans(n_clusters=num_clusters, random_state=42)\n kmeans.fit(data)\n return kmeans.labels_\n\n```"} 58 | {"_id": 188, "response": "```python\nimport cv2\nimport numpy as np\n\ndef gamma_correction(image, gamma):\n # 将图像的亮度值转换为范围在[0, 1]之间的浮点数\n image = image / 255.0\n \n # 对图像进行伽马亮度矫正\n corrected_image = np.power(image, gamma)\n \n # 将亮度值重新缩放到范围[0, 255]\n corrected_image = np.uint8(corrected_image * 255)\n \n return corrected_image\n```"} 59 | {"_id": 189, "response": "```python\n\nimport os\nfrom PyPDF2 import PdfMerger\n\n# 定义一个函数来合并PDF文件\ndef merge_pdfs(root_dir):\n def merge(files, output):\n merger = PdfMerger()\n for pdf in files:\n merger.append(pdf)\n merger.write(output)\n merger.close()\n\n # 遍历文件夹\n for root, dirs, files in os.walk(root_dir):\n pdf_files = [f for f in files if f.endswith(\".pdf\")]\n if pdf_files: # 如果存在PDF文件\n output = os.path.join(root, \"merged.pdf\") # 输出文件名\n full_pdf_files = [os.path.join(root, f) for f in pdf_files] # 获取完整路径\n full_pdf_files = sorted(full_pdf_files)\n merge(full_pdf_files, output) # 合并PDF文件\n\n\n\n\n\n\n\n```"} 60 | {"_id": 190, "response": "```python\n\nimport pandas as pd\nfrom io import StringIO\n\ndef calculate_date_difference(csv_contents):\n # Read the CSV contents into a pandas DataFrame\n df = pd.read_csv(StringIO(csv_contents))\n\n # Convert the 'Date' column to datetime\n df['Date'] = pd.to_datetime(df['Date'])\n\n # Calculate the difference in days between the earliest and latest date\n date_difference = (df['Date'].max() - df['Date'].min()).days\n\n return date_difference\n\n\n```"} 61 | {"_id": 191, "response": "```python\n\nimport os\nimport shutil\n\ndef copy_files(file_path, dest_path, pairs_per_folder=10):\n files = os.listdir(file_path)\n files.sort()\n pairs = zip(files[::2], files[1::2])\n for i, pair in enumerate(pairs):\n sub_folder = os.path.join(dest_path, 'sub{}'.format(i // pairs_per_folder))\n os.makedirs(sub_folder, exist_ok=True)\n for file in pair:\n shutil.copy(os.path.join(file_path, file), sub_folder)\n\n# 使用方法\n# copy_files('/path/to/source/folder', '/path/to/destination/folder')\n\n\n```"} 62 | {"_id": 192, "response": "```python\n\nimport re\n\ndef replace_code_block(text):\n # 定义一个字典,将代码块标记映射到相应的HTML类\n languages = {\n \"html\": \"language-html\",\n \"css\": \"language-css\",\n \"python\": \"language-python\",\n \"javascript\": \"language-javascript\",\n \"golang\": \"language-golang\"\n }\n\n # 对于每种语言,使用正则表达式查找和替换相应的代码块\n for lang, html_class in languages.items():\n # 构建正则表达式模式\n pattern = rf\"```{lang}\\b\\s*(.*?)\\s*```\"\n # 替换文本\n replacement = rf'\\1'\n # 应用替换\n text = re.sub(pattern, replacement, text, flags=re.DOTALL)\n\n return text\n\n\n\n```"}
63 | {"_id": 193, "response": "```python\n\ndef calculate_et(delta, rn, g, gamma, t, u2, es, ea):\n numerator = 0.408 * delta * (rn - g) + gamma * (900 / (t + 273)) * u2 * (es - ea)\n denominator = delta + gamma * (1 + 0.34 * u2)\n if denominator == 0:\n return 0\n et = numerator / denominator\n\n print(et)\n return et\n\n\n```"}
64 | {"_id": 194, "response": "```python\n\nfrom lxml import etree\n\nhtml = \"\"\"\n\n 3131\n AAAA\n BBB\n CCC\n
\n\"\"\"\n\nroot = etree.fromstring(html)\n\ndef has_strong_or_em(tag):\n if tag.find('strong') is not None or tag.find('em') is not None:\n return True\n return False\n\nfor p in root.iter('p'):\n if has_strong_or_em(p):\n print(\"P tag has strong or em tag\")\n else:\n print(\"P tag does not have strong or em tag\")\n\n\n```"} 65 | {"_id": 195, "response": "```python\n\nimport pandas as pd\n\ndef flatten_dict_to_rows(d):\n def flatten_dict(d, parent_key='', sep='.'):\n items = []\n for k, v in d.items():\n new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n if isinstance(v, dict):\n if not v:\n items.append((new_key, {}))\n else:\n items.extend(flatten_dict(v, new_key, sep=sep).items())\n else:\n items.append((new_key, v))\n return dict(items)\n\n flat_dict = flatten_dict(d)\n\n # Convert the flattened dictionary to a pandas DataFrame\n df = pd.DataFrame(list(flat_dict.items()), columns=['Key', 'Value'])\n print(df.to_dict())\n return df\n\n\n```"} 66 | {"_id": 196, "response": "```python\n\ndef is_mac_in_range(mac, mac_range):\n def mac_to_hex(mac_str):\n return mac_str.replace(':', '').replace('-', '').upper()\n\n # 解析MAC地址范围\n start_mac_str, end_mac_str = mac_range.split('-')\n start_mac = int(mac_to_hex(start_mac_str), 16)\n end_mac = int(mac_to_hex(end_mac_str), 16)\n mac_val = int(mac_to_hex(mac), 16)\n\n # 判断MAC地址是否在范围内\n return start_mac <= mac_val <= end_mac\n\n\n```"} 67 | {"_id": 197, "response": "```python\n\nimport numpy as np\nfrom scipy.optimize import curve_fit\n\n# Define the fractional order model with 2 constant phase elements and 3 resistors\ndef fractional_order_model(t, Q, R1, R2, R3, n1, n2):\n return Q * (1 - (1 / (R1 * (t ** n1) + 1)) - (1 / (R2 * (t ** n2) + 1)) - (1 / R3))\n\n# # Sample time and current data\n# time_data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n# current_data = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])\n#\n# # Fit the model to the data\n# popt, pcov = curve_fit(fractional_order_model, time_data, current_data, bounds=(0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))\n# print(len(popt))\n# # The estimated battery capacity in Amp hour is the first parameter of the model\n# estimated_capacity_Ah = popt[0]\n#\n# print(\"Estimated battery capacity: {:.2f} Ah\".format(estimated_capacity_Ah))\n\n\n```"} 68 | {"_id": 198, "response": "```python\n\nimport numpy as np\n\ndef solve_quadratic(points):\n # Create matrix A using a list comprehension\n A = np.array([[x**2, x, 1] for x, y in points])\n # Create vector b\n b = np.array([y for x, y in points])\n # Use numpy.linalg.solve to solve for [a, b, c]\n coefficients = np.linalg.solve(A, b)\n\n return coefficients\n\n```"} 69 | {"_id": 199, "response": "```python\n\nimport ipaddress\n\ndef check_ip_overlap(ip_range1, ip_range2):\n ip_net1 = ipaddress.ip_network(ip_range1)\n ip_net2 = ipaddress.ip_network(ip_range2)\n\n return ip_net1.overlaps(ip_net2)\n\n\n\n```"} 70 | {"_id": 200, "response": "```python\n\nimport os\nimport re\n\ndef rename_files_in_folder(folder_path):\n for filename in os.listdir(folder_path):\n file_type = filename.split('.')[-1]\n new_filename = re.sub(\"[A-Za-z]\", \"\", filename[:-len(file_type)]) + file_type\n os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, new_filename))\n\n# Call the function with the path to the folder\n# rename_files_in_folder('/path/to/your/folder')\n\n\n```"} 71 | --------------------------------------------------------------------------------