├── requirements.txt ├── .github └── workflows │ └── publish_pypi.yml ├── setup.py ├── LICENSE ├── .gitignore ├── README.md └── dc_api.py /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | aiohttp 3 | tenacity 4 | filetype 5 | -------------------------------------------------------------------------------- /.github/workflows/publish_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 8 | runs-on: ubuntu-18.04 9 | steps: 10 | - uses: actions/checkout@master 11 | - name: Set up Python 3.9 12 | uses: actions/setup-python@v1 13 | with: 14 | python-version: 3.9 15 | - name: Install pypa/build 16 | run: >- 17 | python -m 18 | pip install 19 | build 20 | --user 21 | - name: Build a binary wheel and a source tarball 22 | run: >- 23 | python -m 24 | build 25 | --sdist 26 | --wheel 27 | --outdir dist/ 28 | . 29 | - name: Publish distribution 📦 to PyPI 30 | if: startsWith(github.ref, 'refs/tags') 31 | uses: pypa/gh-action-pypi-publish@master 32 | with: 33 | password: ${{ secrets.PYPI_API_TOKEN }} 34 | 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | try: 4 | with open('README.md', 'rt', encoding='UTF8') as fh: 5 | long_description = fh.read() 6 | except IOError as e: 7 | long_description = "" 8 | 9 | setuptools.setup( 10 | name="dc_api", 11 | version="0.8.1", 12 | author="Eunchul, Song", 13 | author_email="eunchulsong9@gmail.com", 14 | description="Deadly dimple unofficial dcinside api", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/song9446/dcinside-python3-api", 18 | #packages=setuptools.find_packages(), 19 | py_modules=['dc_api'], 20 | install_requires=[ 21 | 'lxml', 22 | 'aiohttp', 23 | 'tenacity', 24 | ], 25 | entry_points = """ 26 | [console_scripts] 27 | dc_api = dc_api:dc_api 28 | """, 29 | classifiers=[ 30 | "Programming Language :: Python :: 3", 31 | "License :: OSI Approved :: MIT License", 32 | "Operating System :: OS Independent", 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 song9446 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dcinside-python3-api 2 | Deadly simple non official async dcinside api for python3 3 | 4 | ```python 5 | # 프로그래밍 갤러리 글 무한 크롤링 6 | import asyncio 7 | import dc_api 8 | 9 | async def run(): 10 | async with dc_api.API() as api: 11 | async for index in api.board(board_id="programming"): 12 | print(index.title) # => 땔감 벗어나는법.tip 13 | doc = await index.document() 14 | print(doc.contents) # => 자바를 한다 15 | for img in doc.images: 16 | img.download('./img') # => ./img.gif 17 | async for comm in index.comments(): 18 | print(com.contents) # => ㅇㅇ(1.224) 지랄 ㄴ 19 | 20 | asyncio.run(run()) 21 | ``` 22 | 23 | ```python 24 | import asyncio 25 | import dc_api 26 | 27 | async def run(): 28 | api = dc_api.API() 29 | 30 | # 댓글 작성 31 | await api.write_comment(board_id="programming", doc_id=149123, name="ㅇㅇ", password="1234", contents="ㅇㅈ") 32 | 33 | # 글 작성 34 | doc_id = await api.write_document(board_id="programming", title="java vs python", contents="닥치고 자바", name="ㅇㅇ", password="1234") 35 | 36 | # 글 삭제 37 | await api.remove_document(board_id="programming", doc_id=doc_id, password="1234") 38 | 39 | # 마이너갤 글 작성 40 | doc_id = await api.write_document(board_id="aoegame", title="java vs python", contents="닥치고 자바", name="ㅇㅇ", password="1234", is_minor=True) 41 | 42 | await api.close() 43 | 44 | asyncio.run(run()) 45 | ``` 46 | 47 | # Dependency 48 | python(>3.6) aiohttp, lxml 49 | 50 | # Features 51 | - [x] Board crawling 52 | - [x] Fetch document body 53 | - [x] Fetch comments 54 | - [x] Fetch document images 55 | - [x] Write/Modify/Delete document 56 | - [x] Write comment 57 | - [ ] Delete comment 58 | - [ ] Login/Logout 59 | - [ ] Upvote/Downvote 60 | 61 | # Usage 62 | Place dc_api.py in your working directory 63 | 64 | or install via pip 65 | 66 | ``` 67 | pip3 install --user dc_api 68 | ``` 69 | 70 | ```python 71 | import dc_api 72 | 73 | api = dc_api.API() 74 | 75 | async for index in api.board(board_id="programming", num=-1, start_page=1, document_id_upper_limit=None, document_id_lower_limit=None): 76 | index.id # => 835027 77 | index.board_id # => programming 78 | index.title # => "땔감 벗어나는법.tip" 79 | index.author # => "ㅇㅇ(10.20)" 80 | index.time # => datetime("2020-01-01 01:41:00.000000") 81 | index.comment_count # => 3 82 | index.voteup_count # => 0 83 | index.view_count # => 14 84 | 85 | doc = await index.document() 86 | doc.id # => 835027 87 | doc.board_id # => "programming" 88 | doc.title # => "땔감 벗어나는법.tip" 89 | doc.author # => "ㅇㅇ(10.20)" 90 | doc.author_id # => None (고닉일 경우 고닉 아이디 반환) 91 | doc.time # => datetime("2020-01-01 01:41:00.000000") 92 | doc.comment_count # => 3 93 | doc.voteup_count # => 0 94 | doc.logined_voteup_count # => 0 95 | doc.votedown_count # => 0 96 | doc.view_count # => 14.id 97 | doc.contents # => "자바를 한다" 98 | doc.html # => "
자바를 한다
" 99 | 100 | for image in doc.images: 101 | image.src # => "https://..." 102 | image.document_id # => 835027 103 | image.board_id # => "programming" 104 | await image.load()# => raw image binary 105 | await image.download(path) # => download image to local path(automatically add ext) 106 | 107 | async for com in index.comments(): 108 | com.id # => 123123 109 | com.is_reply # => False 110 | com.time # => "1:55" 111 | com.author # => "ㅇㅇ(192.23)" 112 | com.author_id # => None (고닉일 경우 아이디 반환) 113 | com.contents # => "개솔 ㄴㄴ" 114 | com.dccon # => None (디시콘일경우 디시콘 주소 반환) 115 | com.voice # => None (보이스리플일경우 보이스리플 주소 반환) 116 | 117 | 118 | doc = await api.document(board_id="programming", document_id=835027) 119 | 120 | async for comm in api.comments(board_id="programming", document_id=835027): 121 | comm 122 | 123 | 124 | doc_id = await api.write_document(board_id="programming", 125 | name="점진적자살", password="1234", 126 | title="제목", contents="내용", is_minor=False) 127 | doc_id = await api.modify_document(board_id="programming", document_id=document_id, 128 | name="얄파고", pw="1234", 129 | title="수정된 제목", contents="수정된 내용", is_minor=False) 130 | com_id = await api.write_comment(board_id="programming", document_id=doc_id, 131 | name="점진적자살", password="1234", contents="설리") 132 | await api.remove_document(board_id="programming", document_id=document_id, password="1234") 133 | 134 | ``` 135 | -------------------------------------------------------------------------------- /dc_api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import lxml.html 4 | from datetime import datetime, timedelta 5 | import itertools 6 | import aiohttp 7 | import filetype 8 | 9 | DOCS_PER_PAGE = 200 10 | 11 | GET_HEADERS = { 12 | "User-Agent": "Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/67.0.3396.87 Mobile Safari/537.36" 13 | } 14 | XML_HTTP_REQ_HEADERS = { 15 | "Accept": "*/*", 16 | "Connection": "keep-alive", 17 | "User-Agent": "Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/67.0.3396.87 Mobile Safari/537.36", 18 | "X-Requested-With": "XMLHttpRequest", 19 | "Accept-Encoding": "gzip, deflate, br", 20 | "Accept-Language": "en-US,en;q=0.5", 21 | "X-Requested-With": "XMLHttpRequest", 22 | "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", 23 | } 24 | 25 | POST_HEADERS = { 26 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 27 | "Accept-Encoding": "gzip, deflate, br", 28 | "Accept-Language": "en-US,en;q=0.9,ko;q=0.8", 29 | "Cache-Control": "no-cache", 30 | "Connection": "keep-alive", 31 | "Pragma": "no-cache", 32 | "Upgrade-Insecure-Requests": "1", 33 | "User-Agent": "Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/67.0.3396.87 Mobile Safari/537.36", 34 | } 35 | 36 | GALLERY_POSTS_COOKIES = { 37 | "__gat_mobile_search": 1, 38 | "list_count": DOCS_PER_PAGE, 39 | } 40 | 41 | import re 42 | def unquote(encoded): 43 | return re.sub(r'\\u([a-fA-F0-9]{4}|[a-fA-F0-9]{2})', lambda m: chr(int(m.group(1), 16)), encoded) 44 | def quote(decoded): 45 | arr = [] 46 | for c in decoded: 47 | t = hex(ord(c))[2:].upper() 48 | if len(t) >= 4: 49 | arr.append("%u" + t) 50 | else: 51 | arr.append("%" + t) 52 | return "".join(arr) 53 | def peek(iterable): 54 | try: 55 | first = next(iterable) 56 | except StopIteration: 57 | return None 58 | return first, itertools.chain((first,), iterable) 59 | 60 | class DocumentIndex: 61 | __slots__ = ["id", "subject", "title", "board_id", "has_image", "author", "time", "view_count", "comment_count", "voteup_count", "document", "comments", "image_available"] 62 | def __init__(self, id, board_id, title, has_image, author, time, view_count, comment_count, voteup_count, document, comments, subject, image_available): 63 | self.id = id 64 | self.board_id = board_id 65 | self.title = title 66 | self.has_image = has_image 67 | self.author = author 68 | self.time = time 69 | self.view_count = view_count 70 | self.comment_count = comment_count 71 | self.voteup_count = voteup_count 72 | self.document = document 73 | self.comments = comments 74 | self.subject = subject 75 | self.image_available = image_available 76 | def __str__(self): 77 | return f"{self.subject or ''}\t|{self.id}\t|{self.time.isoformat()}\t|{self.author}\t|{self.title}({self.comment_count}) +{self.voteup_count}" 78 | 79 | class Document: 80 | __slots__ = ["id", "board_id", "title", "author", "author_id", "contents", "images", "html", "view_count", "voteup_count", "votedown_count", "logined_voteup_count", "time", "subject", "comments"] 81 | def __init__(self, id, board_id, title, author, author_id, contents, images, html, view_count, voteup_count, votedown_count, logined_voteup_count, time, comments, subject=None): 82 | self.id = id 83 | self.board_id = board_id 84 | self.title = title 85 | self.author = author 86 | self.author_id = author_id 87 | self.contents = contents 88 | self.images = images 89 | self.html = html 90 | self.view_count = view_count 91 | self.voteup_count = voteup_count 92 | self.votedown_count = votedown_count 93 | self.logined_voteup_count = logined_voteup_count 94 | self.comments = comments 95 | self.time = time 96 | self.subject = None 97 | def __str__(self): 98 | return f"{self.subject or ''}\t|{self.id}\t|{self.time.isoformat()}\t|{self.author}\t|{self.title}({self.comment_count}) +{self.voteup_count} -{self.votedown_count}\n{self.contents}" 99 | 100 | class Comment: 101 | __slots__ = ["id", "is_reply", "author", "author_id", "contents", "dccon", "voice", "time"] 102 | def __init__(self, id, is_reply, author, author_id, contents, dccon, voice, time): 103 | self.id = id 104 | self.is_reply = is_reply 105 | self.author = author 106 | self.author_id = author_id 107 | self.contents = contents 108 | self.dccon = dccon 109 | self.voice = voice 110 | self.time = time 111 | def __str__(self): 112 | return f"ㄴ{'ㄴ' if self.is_reply else ''} {self.author}: {self.contents or ''}{self.dccon or ''}{self.voice or ''} | {self.time}" 113 | 114 | class Image: 115 | __slots__ = ["src", "document_id", "board_id", "session"] 116 | def __init__(self, src, document_id, board_id, session): 117 | self.src = src 118 | self.document_id = document_id 119 | self.board_id = board_id 120 | self.session = session 121 | async def load(self): 122 | headers = GET_HEADERS.copy() 123 | headers["Referer"] = "https://m.dcinside.com/board/{}/{}".format(self.board_id, self.document_id) 124 | async with self.session.get(self.src, cookies=GALLERY_POSTS_COOKIES, headers=headers) as res: 125 | return await res.read() 126 | async def download(self, path): 127 | headers = GET_HEADERS.copy() 128 | headers["Referer"] = "https://m.dcinside.com/board/{}/{}".format(self.board_id, self.document_id) 129 | async with self.session.get(self.src, cookies=GALLERY_POSTS_COOKIES, headers=headers) as res: 130 | bytes = await res.read() 131 | ext = filetype.guess(bytes).extension 132 | with open(path + '.' + ext, 'wb') as f: 133 | f.write(bytes) 134 | 135 | 136 | 137 | class API: 138 | def __init__(self): 139 | self.session = aiohttp.ClientSession(headers=GET_HEADERS, cookies={"_ga": "GA1.2.693521455.1588839880"}) 140 | async def close(self): 141 | await self.session.close() 142 | async def __aenter__(self): 143 | return self 144 | async def __aexit__(self, *args, **kwargs): 145 | await self.close() 146 | async def watch(self, board_id): 147 | pass 148 | async def gallery(self, name=None): 149 | url = "https://m.dcinside.com/galltotal" 150 | gallerys={} 151 | async with self.session.get(url) as res: 152 | text = await res.text() 153 | parsed = lxml.html.fromstring(text) 154 | for i in parsed.xpath('//*[@id="total_1"]/li'): 155 | for e in i.iter(): 156 | if e.tag == "a": 157 | board_name = e.text 158 | board_id = e.get("href").split("/")[-1] 159 | if name: 160 | if name in board_name: 161 | gallerys[board_name] = board_id 162 | else: 163 | gallerys[board_name] = board_id 164 | return gallerys 165 | async def board(self, board_id, num=-1, start_page=1, recommend=False, document_id_upper_limit=None, document_id_lower_limit=None, is_minor=False): 166 | page = start_page 167 | while num: 168 | if recommend: 169 | url = "https://m.dcinside.com/board/{}?recommend=1&page={}".format(board_id, page) 170 | else: 171 | url = "https://m.dcinside.com/board/{}?page={}".format(board_id, page) 172 | async with self.session.get(url) as res: 173 | text = await res.text() 174 | parsed = lxml.html.fromstring(text) 175 | doc_headers = (i[0] for i in parsed.xpath("//ul[contains(@class, 'gall-detail-lst')]/li") if not i.get("class", "").startswith("ad")) 176 | for doc in doc_headers: 177 | document_id = doc[0].get("href").split("/")[-1].split("?")[0] 178 | if document_id_upper_limit and int(document_id_upper_limit) <= int(document_id): continue 179 | if document_id_lower_limit and int(document_id_lower_limit) >= int(document_id): return 180 | if len(doc[0][1]) == 5: 181 | subject = doc[0][1][0].text 182 | author = doc[0][1][1].text 183 | time= self.__parse_time(doc[0][1][2].text) 184 | view_count= int(doc[0][1][3].text.split()[-1]) 185 | voteup_count= int(doc[0][1][4][0].text.split()[-1]) 186 | else: 187 | subject = None 188 | author = doc[0][1][0].text 189 | time= self.__parse_time(doc[0][1][1].text) 190 | view_count= int(doc[0][1][2].text.split()[-1]) 191 | voteup_count= int(doc[0][1][3].text_content().split()[-1]) 192 | if "sp-lst-img" in doc[0][0][0].get("class"): 193 | image_available = True 194 | else: 195 | image_available = False 196 | title = doc[0][0][1].text 197 | indexdata = DocumentIndex( 198 | id= document_id, 199 | board_id=board_id, 200 | title= title, 201 | has_image= doc[0][0][0].get("class").endswith("img"), 202 | author= author, 203 | view_count= view_count, 204 | voteup_count= voteup_count, 205 | comment_count= int(doc[1][0].text), 206 | document= lambda: self.document(board_id, document_id), 207 | comments= lambda: self.comments(board_id, document_id), 208 | time= time, 209 | subject=subject, 210 | image_available=image_available 211 | ) 212 | yield(indexdata) 213 | num-=1 214 | if num==0: 215 | break 216 | if not doc_headers: 217 | break 218 | else: 219 | page+=1 220 | async def document(self, board_id, document_id): 221 | url = "https://m.dcinside.com/board/{}/{}".format(board_id, document_id) 222 | async with self.session.get(url) as res: 223 | text = await res.text() 224 | parsed = lxml.html.fromstring(text) 225 | doc_content_container = parsed.xpath("//div[@class='thum-txtin']") 226 | doc_head_containers = parsed.xpath("//div[starts-with(@class, 'gallview-tit-box')]") 227 | if not len(doc_head_containers): 228 | return None 229 | doc_head_container = doc_head_containers[0] 230 | if len(doc_content_container): 231 | title = " ".join(doc_head_container[0].text.strip().split()) 232 | author = doc_head_container[1][0][0].text.strip() 233 | author_id = None if len(doc_head_container[1]) <= 1 else doc_head_container[1][1][0].get("href").split("/")[-1] 234 | time = doc_head_container[1][0][1].text.strip() 235 | doc_content = parsed.xpath("//div[@class='thum-txtin']")[0] 236 | for adv in doc_content.xpath("div[@class='adv-groupin']"): 237 | adv.getparent().remove(adv) 238 | for adv in doc_content.xpath("//img"): 239 | if adv.get("src", "").startswith("https://nstatic") and not adv.get("data-original"): 240 | adv.getparent().remove(adv) 241 | return Document( 242 | id = document_id, 243 | board_id = board_id, 244 | title= title, 245 | author= author, 246 | author_id =author_id, 247 | contents= '\n'.join(i.strip() for i in doc_content.itertext() if i.strip() and not i.strip().startswith("이미지 광고")), 248 | images= [Image( 249 | src=i.get("data-original", i.get("src")), 250 | board_id=board_id, 251 | document_id=document_id, 252 | session=self.session) 253 | for i in doc_content.xpath("//img") 254 | if i.get("data-original") or (not i.get("src","").startswith("https://nstatic") and 255 | not i.get("src", "").startswith("https://img.iacstatic.co.kr") and i.get("src"))], 256 | html= lxml.html.tostring(doc_content, encoding=str), 257 | view_count= int(parsed.xpath("//ul[@class='ginfo2']")[1][0].text.strip().split()[1]), 258 | voteup_count= int(parsed.xpath("//span[@id='recomm_btn']")[0].text.strip()), 259 | votedown_count= int(parsed.xpath("//span[@id='nonrecomm_btn']")[0].text.strip()), 260 | logined_voteup_count= int(parsed.xpath("//span[@id='recomm_btn_member']")[0].text.strip()), 261 | comments= lambda: self.comments(board_id, document_id), 262 | time= self.__parse_time(time) 263 | ) 264 | else: 265 | # fail due to unusual tags in mobile version 266 | # at now, just skip it 267 | return None 268 | ''' !TODO: use an alternative(PC) protocol to fetch document 269 | else: 270 | url = "https://gall.dcinside.com/{}?no={}".format(board_id, document_id) 271 | res = sess.get(url, timeout=TIMEOUT, headers=ALTERNATIVE_GET_HEADERS) 272 | parsed = lxml.html.fromstring(res.text) 273 | doc_content = parsed.xpath("//div[@class='thum-txtin']")[0] 274 | return '\n'.join(i.strip() for i in doc_content.itertext() if i.strip() and not i.strip().startswith("이미지 광고")), [i.get("src") for i in doc_content.xpath("//img") if not i.get("src","").startswith("https://nstatic")], comments(board_id, document_id, sess=sess) 275 | ''' 276 | async def comments(self, board_id, document_id, num=-1, start_page=1): 277 | url = "https://m.dcinside.com/ajax/response-comment" 278 | for page in range(start_page, 999999): 279 | payload = {"id": board_id, "no": document_id, "cpage": page, "managerskill":"", "del_scope": "1", "csort": ""} 280 | async with self.session.post(url, headers=XML_HTTP_REQ_HEADERS, data=payload) as res: 281 | parsed = lxml.html.fromstring(await res.text()) 282 | if not len(parsed[1].xpath("li")): break 283 | for li in parsed[1].xpath("li"): 284 | if not len(li[0]) or not li[0].text: continue 285 | yield Comment( 286 | id= li.get("no"), 287 | is_reply = "comment-add" in li.get("class", "").strip().split(), 288 | author = li[0].text + ("{}".format(li[0][0].text) if li[0][0].text else ""), 289 | author_id= li[0][1].get("data-info", None) if len(li[0]) > 1 else None, 290 | contents= '\n'.join(i.strip() for i in li[1].itertext()), 291 | dccon= li[1][0].get("data-original", li[1][0].get("src", None)) if len(li[1]) and li[1][0].tag=="img" else None, 292 | voice= li[1][0].get("src", None) if len(li[1]) and li[1][0].tag=="iframe" else None, 293 | time= self.__parse_time(li[2].text)) 294 | num -= 1 295 | if num == 0: 296 | return 297 | page_num_els = parsed.xpath("span[@class='pgnum']") 298 | if page_num_els: 299 | p = page_num_els[0].itertext() 300 | next(p) 301 | if page == next(p)[1:]: 302 | break 303 | else: 304 | break 305 | async def write_comment(self, board_id, document_id, contents="", dccon_id="", dccon_src="", parent_comment_id="", name="", password="", is_minor=False): 306 | url = "https://m.dcinside.com/board/{}/{}".format(board_id, document_id) 307 | async with self.session.get(url) as res: 308 | parsed = lxml.html.fromstring(await res.text()) 309 | hide_robot = parsed.xpath("//input[@class='hide-robot']")[0].get("name") 310 | csrf_token = parsed.xpath("//meta[@name='csrf-token']")[0].get("content") 311 | title = parsed.xpath("//span[@class='tit']")[0].text.strip() 312 | board_name = parsed.xpath("//a[@class='gall-tit-lnk']")[0].text.strip() 313 | con_key = await self.__access("com_submit", url, require_conkey=False, csrf_token=csrf_token) 314 | header = XML_HTTP_REQ_HEADERS.copy() 315 | header["Referer"] = url 316 | header["Host"] = "m.dcinside.com" 317 | header["Origin"] = "https://m.dcinside.com" 318 | header["X-CSRF-TOKEN"] = csrf_token 319 | cookies = { 320 | "m_dcinside_" + board_id: board_id, 321 | "m_dcinside_lately": quote(board_id + "|" + board_name + ","), 322 | "_ga": "GA1.2.693521455.1588839880", 323 | } 324 | url = "https://m.dcinside.com/ajax/comment-write" 325 | payload = { 326 | "comment_memo": contents, 327 | "comment_nick": name, 328 | "comment_pw": password, 329 | "mode": "com_write", 330 | "comment_no": parent_comment_id, 331 | "id": board_id, 332 | "no": document_id, 333 | "best_chk": "", 334 | "subject": title, 335 | "board_id": "0", 336 | "reple_id":"", 337 | "cpage": "1", 338 | "con_key": con_key, 339 | hide_robot: "1", 340 | } 341 | if dccon_id: payload["detail_idx"] = dccon_id 342 | if dccon_src: payload["comment_memo"] = "