├── tests ├── __init__.py ├── supporting_files │ ├── list-host.txt │ └── list-host-with-filename.txt ├── .gitignore ├── test_downloader.py └── test_lpse.py ├── requirements.txt ├── pyproc ├── __init__.py ├── exceptions.py ├── utils.py ├── text.py ├── lpse.py └── cli.py ├── .gitignore ├── LICENSE ├── pyproject.toml ├── .github └── workflows │ ├── test.yml │ └── pyproc-pypi.yml ├── CHANGELOG.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/supporting_files/list-host.txt: -------------------------------------------------------------------------------- 1 | sumbarprov 2 | bengkuluprov -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | backoff 2 | beautifulsoup4 3 | html5lib 4 | requests 5 | 6 | -------------------------------------------------------------------------------- /tests/supporting_files/list-host-with-filename.txt: -------------------------------------------------------------------------------- 1 | sumbarprov;sumbar.csv 2 | bengkuluprov;bengkulu.csv -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | !supporting_files 2 | !__init__.py 3 | !test_lpse.py 4 | !test_downloader.pyg 5 | *.idx 6 | 7 | -------------------------------------------------------------------------------- /pyproc/__init__.py: -------------------------------------------------------------------------------- 1 | from .lpse import Lpse, JenisPengadaan 2 | 3 | __version__ = '0.2a' 4 | __author__ = 'Agung Pratama' 5 | __all__ = [ 6 | 'Lpse', 7 | 'JenisPengadaan', 8 | 'utils', 9 | 'exceptions' 10 | ] -------------------------------------------------------------------------------- /pyproc/exceptions.py: -------------------------------------------------------------------------------- 1 | class LpseVersionException(Exception): 2 | pass 3 | 4 | 5 | class LpseHostExceptions(Exception): 6 | pass 7 | 8 | 9 | class LpseServerExceptions(Exception): 10 | pass 11 | 12 | 13 | class LpseAuthTokenNotFound(Exception): 14 | pass 15 | 16 | 17 | class DownloaderContextException(Exception): 18 | pass 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | venv 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | *.csv 56 | *.idx 57 | statistic.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Agung Pratama 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pyproc" 7 | version = "0.2a" 8 | authors = [ 9 | { name="Agung Pratama", email="workwithagung@gmail.com"} 10 | ] 11 | description = "Python Inaproc SPSE SDK" 12 | readme = "README.md" 13 | requires-python = ">=3.9" 14 | classifiers = [ 15 | 'Development Status :: 4 - Beta', 16 | 'Intended Audience :: Developers', 17 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: CGI Tools/Libraries', 18 | 'Topic :: Software Development :: Libraries :: Python Modules', 19 | 'Topic :: Utilities', 20 | 'Natural Language :: English', 21 | 'Natural Language :: Indonesian', 22 | 'Operating System :: OS Independent', 23 | 'Programming Language :: Python :: 3.7', 24 | 'License :: OSI Approved :: MIT License' 25 | ] 26 | license = "MIT" 27 | license-files = ["LICENSE"] 28 | dependencies = [ 29 | "requests", 30 | "beautifulsoup4", 31 | "html5lib", 32 | "backoff" 33 | ] 34 | 35 | [project.urls] 36 | Homepage = "https://github.com/wakataw/pyproc" 37 | Issues = "https://github.com/wakataw/pyproc/issues" 38 | 39 | [project.scripts] 40 | pyproc = "pyproc.cli:main" 41 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: PyProc Test 5 | 6 | on: 7 | push: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build-and-test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.9', '3.10', '3.13'] 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v3 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Display Python Version 24 | run: python -c "import sys; print(sys.version)" 25 | - name: Clean build directory 26 | run: rm -rf ./dists ./pyproc.egg-info ./tests/*.csv ./tests/*.idx ./*csv ./*idx 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install pytest 31 | python -m pip install -r requirements.txt 32 | - name: Test Package 33 | run: pytest 34 | - name: Build Package 35 | run: python -m build 36 | -------------------------------------------------------------------------------- /.github/workflows/pyproc-pypi.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: PyProc Build and Release 5 | 6 | on: 7 | push: 8 | tags: 9 | - v* 10 | 11 | jobs: 12 | release-build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: '3.x' 21 | 22 | - name: Display Python Version 23 | run: python -c "import sys; print(sys.version)" 24 | 25 | - name: Clean build directory 26 | run: rm -rf ./dists ./pyproc.egg-info ./tests/*.csv ./tests/*.idx ./*csv ./*idx 27 | 28 | - name: Build Package 29 | run: | 30 | python -m pip install build 31 | python -m build 32 | 33 | - name: Upload distributions 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: release-dists 37 | path: dist/ 38 | 39 | pypi-publish: 40 | runs-on: ubuntu-latest 41 | needs: 42 | - release-build 43 | permissions: 44 | id-token: write 45 | 46 | environment: 47 | name: pypi 48 | 49 | steps: 50 | - name: Retrieve release distributions 51 | uses: actions/download-artifact@v4 52 | with: 53 | name: release-dists 54 | path: dist/ 55 | 56 | - name: Publish rleease distributions to PyPI 57 | uses: pypa/gh-action-pypi-publish@release/v1 58 | with: 59 | package-dir: dist/ 60 | 61 | 62 | -------------------------------------------------------------------------------- /pyproc/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import re 4 | import requests 5 | from bs4 import BeautifulSoup 6 | from urllib.parse import urlparse 7 | 8 | TOKEN_FORMAT = re.compile(r"d\.authenticityToken[\s+]=[\s+]['\"]([0-9a-zA-Z]+)['\"];", re.DOTALL) 9 | 10 | 11 | def parse_token(page): 12 | token = TOKEN_FORMAT.findall(page) 13 | 14 | if token: 15 | return token[0] 16 | 17 | return 18 | 19 | 20 | def get_all_host(): 21 | resp = requests.get('https://satudata.inaproc.id/service/daftarLPSE', timeout=10) 22 | data = json.loads(resp.content) 23 | 24 | return data 25 | 26 | 27 | def download_host(logging, name='daftarlpse.csv'): 28 | data = get_all_host() 29 | hosts = dict() 30 | invalid_host = 0 31 | 32 | for item in data: 33 | try: 34 | url = item['repo_url4'] 35 | except KeyError: 36 | url = item['repo_url'] 37 | 38 | parsed_url = urlparse(url) 39 | 40 | if not parsed_url.scheme.startswith('http'): 41 | invalid_host += 1 42 | continue 43 | 44 | hosts[url] = str(item['repo_id']) + '-' + \ 45 | ' '.join([i for i in re.sub(r'[^a-zA-Z\d\s]', ' ', item['repo_nama']).split() if i.strip() != '']) 46 | 47 | logging.info( 48 | "{} alamat LPSE ditemukan. {} alamat valid, {} alamat tidak valid, {} alamat terduplikasi.".format( 49 | len(data), len(hosts), invalid_host, len(data) - len(hosts) - invalid_host 50 | ) 51 | ) 52 | logging.debug(hosts) 53 | 54 | with open(name, 'w', newline='', encoding='utf-8') as f: 55 | writer = csv.writer(f, delimiter=';') 56 | for k, v in hosts.items(): 57 | writer.writerow([k, v]) 58 | 59 | logging.info("Export daftar lpse ke {}".format(name)) 60 | 61 | 62 | def parse_version(version): 63 | version = tuple(map(int, re.findall(r'(?P\d+).(?P\d+)u(?P\d{8})', version)[0])) 64 | return version 65 | -------------------------------------------------------------------------------- /pyproc/text.py: -------------------------------------------------------------------------------- 1 | from pyproc import __version__ 2 | 3 | ########################## 4 | # downloader header logo # 5 | ########################## 6 | INFO = r''' ____ ____ 7 | / __ \__ __/ __ \_________ _____ 8 | / /_/ / / / / /_/ / ___/ __ \/ ___/ 9 | / ____/ /_/ / ____/ / / /_/ / /__ 10 | /_/ \__, /_/ /_/ \____/\___/ 11 | /____/ 12 | SPSE4 Downloader, PyProc v{} 13 | '''.format(__version__) 14 | 15 | ############################## 16 | # argument help text # 17 | ############################## 18 | HELP_KEYWORD = "filter pencarian index paket berdasarkan kata kunci tertentu" 19 | HELP_TAHUN_ANGGARAN = "filter download detail berdasarkan tahun anggaran. Format tahun anggaran bisa " \ 20 | "dilihat di dokumentasi" 21 | HELP_CHUNK_SIZE = "jumlah daftar index per-halaman yang diunduh dalam satu iterasi" 22 | HELP_WORKERS = "jumlah workers untuk mengunduh detil paket secara paralel" 23 | HELP_TIMEOUT = "besaran waktu timeout untuk menunggu respon dari server" 24 | HELP_NONTENDER = "flag untuk mengunduh data paket pengadaan langsung" 25 | HELP_INDEX_DOWNLOAD_DELAY = "waktu delay untuk setiap iterasi halaman index dalam detik" 26 | HELP_KEEP = "tidak menghapus working direktori dari downloader" 27 | HELP_LPSE_HOST = "host LPSE atau file teks berisi daftar host LPSE. Format dapat dilihat di dokumentasi" 28 | HELP_LOG_LEVEL = "Set log level" 29 | HELP_KATEGORI = "filter pencarian index paket berdasarkan kategori" 30 | HELP_PENYEDIA = "filter pencarian index paket berdasarkan nama penyedia" 31 | HELP_OUTPUT = "format output hasil download" 32 | HELP_RESUME = "melanjutkan proses sebelumnya" 33 | HELP_CSV_SEPARATOR = "set custom csv separator, default koma" 34 | 35 | ##################### 36 | # Error Information # 37 | ##################### 38 | 39 | ERROR_CTX_TAHUN_ANGGARAN = "Gagal parsing tahun anggaran, format yang diperbolehkan X-Y atau X;Y;Z" 40 | ERROR_CTX_RANGE_TAHUN = "Nilai tahun harus di antara 2000 dan {}" 41 | ERROR_CTX_HOST_SKEMA = "Skema URL tidak ditemukan. URL harus diawali http/https" 42 | ERROR_CTX_HOST_FORMAT = "Argumen host `{}` tidak sesuai format" 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## 0.1.7 8 | 9 | ### Fix 10 | - [downloader] fix error saat menggabungkan file detil jika pemenang tidak ditemukan 11 | - [downloader] ketika retry download , detil downloader akan menggunakan cache dari proses sebelumnya 12 | - [downloader] pemilihan pemenang berdasarkan hirarki pemenang berkontrak > pemenang terverifikasi > pemenang 13 | - [downloader] fix filter tahun anggaran, jika tidak ada data tahun anggaran maka filter akan berdasarkan data tanggal pembuatan 14 | - [package] Fix NPWP - Nama Peserta splitter pada hasil evaluasi 15 | - [package] menghilangkan fungsi pengecekan url rewrite dengan hardcoded path `/eproc4` pada host SPSE 16 | - [package] bypass parsing auth token jika versi SPSE < 20191009 17 | 18 | ### Add 19 | - [downloader] jika proses download index gagal di tengah jalan, aplikasi akan melanjutkan berdasarkan posisi batch terakhir 20 | - [downloader] menambahkan jenis paket pengadaan pada header informasi 21 | - [downloader] menambahkan argument `--skip-spse-check` 22 | - [package] menambahkan parameter `skip_spse_checking` untuk menghindari proses parsing info LPSE yang gagal jika aplikasi menggunakan custom homepage 23 | 24 | ## Change 25 | - update test case 26 | 27 | ## 0.1.6 28 | 29 | ### Change 30 | - Mengganti tipe data menjadi boolean untuk kolom pemenang dan pemenang berkontrak pada hasil evaluasi 31 | - Set default index download delay menjadi 1 detik 32 | - Set pemenang tender dari hasil evaluasi 33 | - Mengganti separator tahun anggaran (range) dari '-' (koma) menjadi '-' (dash) 34 | - update test case 35 | - minor update 36 | 37 | ## 0.1.5 38 | 39 | ### Add 40 | - menambahkan parameter `--index-download-delay` pada downloader CLI karena beberapa situs LPSE membatasi jumlah request 41 | 42 | ### Fix 43 | - fix `LpseDetil.get_pemenang` error karena menggunakan kolom hasil negosiasi sebagai parameter pengurutan 44 | 45 | ## 0.1.4 46 | 47 | ### Fix 48 | - Fix lpse pool error pada downloader karena belum implementasi authenticity token 49 | 50 | ## 0.1.3 51 | 52 | ### Fix 53 | - Fix download index paket error karena penambahan parameter pada API SPSE 54 | 55 | ### Add 56 | - Menambahkan method `Lpse.get_auth_token` untuk mendapatkan `auth_token` yang digunakan pada saat mendapatkan data index paket 57 | 58 | ## 0.1.2 59 | 60 | ### Fix 61 | - Fix downloader error karena perubahan api pada 0.1.1 62 | 63 | ## 0.1.1 64 | 65 | ### Fix 66 | - Fix pemenang double pada package dan downloader dengan memilih nilai penawaran paling rendah 67 | 68 | ## 0.1 69 | Release versi stable pertama 70 | 71 | ### Fitur 72 | 73 | - Dukungan Penuh untuk API SPSE Versi 4 74 | - Mendapatkan Daftar Paket Tender dan Non Tender 75 | - Mendapatkan Detil Paket (Pengumuman, Peserta, Hasil Evaluasi, Pemenang, Pemenang Berkontrak, Jadwal Penetapan Pemenang, Jadwal Penandatangan Kontrak) 76 | - Filter pencarian paket tender/non tender berdasarkan kategori pengadaan. 77 | - Mengurutkan Pencarian paket berdasarkan id paket, nama instansi, tahap paket, dan HPS 78 | - CLI Downloader 79 | -------------------------------------------------------------------------------- /tests/test_downloader.py: -------------------------------------------------------------------------------- 1 | import time 2 | import unittest 3 | from pyproc.cli import * 4 | 5 | 6 | class DownloaderTest(unittest.TestCase): 7 | LPSE_HOST_1 = 'kemenkeu' 8 | LPSE_HOST_2 = 'sumbarprov' 9 | LPSE_HOST_2_FILENAME = 'sumbarprov' 10 | LPSE_HOST_3 = 'bengkuluprov' 11 | LPSE_HOST_3_FILENAME = 'bengkuluprov' 12 | 13 | def test_context_parser(self): 14 | downloader = Downloader() 15 | ctx = downloader.get_ctx("--keyword WKWK --tahun-anggaran 2020 --workers 999 --chunk-size 1000 --timeout 99 " 16 | "--non-tender --index-download-delay 5 --keep-index " 17 | "--kategori PEKERJAAN_KONSTRUKSI --nama-penyedia HAHA --resume --sep | " 18 | f"{self.LPSE_HOST_2}".split(' ')) 19 | expected_condition = { 20 | '_DownloaderContext__lpse_host': self.LPSE_HOST_2, 21 | 'chunk_size': 1000, 22 | 'keep_index': True, 23 | 'index_download_delay': 5, 24 | '_kategori': "PEKERJAAN_KONSTRUKSI", 25 | 'keyword': 'WKWK', 26 | 'nama_penyedia': "HAHA", 27 | 'non_tender': True, 28 | 'tahun_anggaran': [2020], 29 | 'timeout': 99, 30 | 'log_level': 'INFO', 31 | 'output_format': 'csv', 32 | 'resume': True, 33 | 'separator': '|', 34 | 'workers': 1 35 | } 36 | 37 | for key, v in ctx.__dict__.items(): 38 | self.assertEqual(v, expected_condition[key]) 39 | 40 | def test_tahun_anggaran_parser_single_tahun(self): 41 | downloader = Downloader() 42 | ctx = downloader.get_ctx(f"--tahun-anggaran 2015 {self.LPSE_HOST_2}".split(' ')) 43 | self.assertEqual([2015], ctx.tahun_anggaran) 44 | 45 | def test_tahun_anggaran_parser_multiple_tahun(self): 46 | downloader = Downloader() 47 | ctx = downloader.get_ctx(F"--tahun-anggaran 2015,2016,2020 {self.LPSE_HOST_2}".split(' ')) 48 | self.assertEqual([2015, 2016, 2020], ctx.tahun_anggaran) 49 | 50 | def test_tahun_anggaran_parser_range_tahun(self): 51 | downloader = Downloader() 52 | ctx = downloader.get_ctx(f"--tahun-anggaran 2015-2020 {self.LPSE_HOST_2}".split(' ')) 53 | self.assertEqual([i for i in range(2015,2021)], ctx.tahun_anggaran) 54 | 55 | def test_tahun_anggaran_parser_range_and_multiple_tahun(self): 56 | downloader = Downloader() 57 | ctx = downloader.get_ctx(f"--tahun-anggaran 2015-2020,2013,2012 {self.LPSE_HOST_2}".split(' ')) 58 | self.assertEqual([2012, 2013, 2015, 2016, 2017, 2018, 2019, 2020], ctx.tahun_anggaran) 59 | 60 | def test_tahun_anggaran_parser_invalid_format_1(self): 61 | downloader = Downloader() 62 | self.assertRaises(DownloaderContextException, downloader.get_ctx, 63 | f"--tahun-anggaran 2015;2020 {self.LPSE_HOST_2}".split(' ')) 64 | 65 | def test_tahun_anggaran_parser_invalid_value(self): 66 | downloader = Downloader() 67 | self.assertRaises(DownloaderContextException, downloader.get_ctx, 68 | f"--tahun-anggaran 1999-2030 {self.LPSE_HOST_2}".split(' ')) 69 | 70 | def test_lpse_host_parser(self): 71 | downloader = Downloader() 72 | ctx = downloader.get_ctx(f"--log=DEBUG {self.LPSE_HOST_2}".split(' ')) 73 | 74 | for i in ctx.lpse_host_list: 75 | self.assertTrue(i.is_valid) 76 | self.assertIsNone(i.error) 77 | self.assertEqual(self.LPSE_HOST_2, i.url) 78 | self.assertEqual(self.LPSE_HOST_2_FILENAME, i.filename.name) 79 | 80 | def test_lpse_host_multiple(self): 81 | downloader = Downloader() 82 | ctx = downloader.get_ctx(f"--log=DEBUG {self.LPSE_HOST_2},{self.LPSE_HOST_3}".split(' ')) 83 | urls = [self.LPSE_HOST_2, self.LPSE_HOST_3] 84 | filename = [self.LPSE_HOST_2_FILENAME, self.LPSE_HOST_3_FILENAME] 85 | 86 | for i in ctx.lpse_host_list: 87 | self.assertTrue(i.is_valid) 88 | self.assertIsNone(i.error) 89 | self.assertTrue(i.url in urls and i.filename.name in filename) 90 | 91 | def test_lpse_host_single_with_filename(self): 92 | downloader = Downloader() 93 | ctx = downloader.get_ctx(f"--log=DEBUG {self.LPSE_HOST_2};{self.LPSE_HOST_2_FILENAME}".split(' ')) 94 | 95 | for i in ctx.lpse_host_list: 96 | self.assertTrue(i.is_valid) 97 | self.assertIsNone(i.error) 98 | self.assertEqual(self.LPSE_HOST_2, i.url) 99 | self.assertEqual(self.LPSE_HOST_2_FILENAME, i.filename.name) 100 | 101 | def test_lpse_host_multiple_with_filename(self): 102 | downloader = Downloader() 103 | ctx = downloader.get_ctx(f"--log=DEBUG {self.LPSE_HOST_2};1.csv,{self.LPSE_HOST_3};2.csv".split(' ')) 104 | urls = [self.LPSE_HOST_2, self.LPSE_HOST_3] 105 | filename = ['1.csv', '2.csv'] 106 | 107 | for i in ctx.lpse_host_list: 108 | self.assertTrue(i.is_valid) 109 | self.assertIsNone(i.error) 110 | self.assertTrue(i.url in urls and i.filename.name in filename) 111 | 112 | def test_lpse_host_from_file(self): 113 | downloader = Downloader() 114 | file_path = Path(__file__).parent / 'supporting_files' / 'list-host.txt' 115 | ctx = downloader.get_ctx(["--log=DEBUG", file_path.as_posix()]) 116 | urls = ['sumbarprov', 'bengkuluprov'] 117 | filename = ['sumbarprov', 'bengkuluprov'] 118 | 119 | for i in ctx.lpse_host_list: 120 | print(i) 121 | self.assertTrue(i.is_valid) 122 | self.assertIsNone(i.error) 123 | self.assertTrue(i.url in urls and i.filename.name in filename) 124 | 125 | def test_lpse_host_from_file_multiple_with_filename(self): 126 | downloader = Downloader() 127 | file_path = Path(__file__).parent / 'supporting_files' / 'list-host-with-filename.txt' 128 | ctx = downloader.get_ctx(["--log=DEBUG", file_path.as_posix()]) 129 | urls = ['sumbarprov', 'bengkuluprov'] 130 | filename = ['sumbar.csv', 'bengkulu.csv'] 131 | 132 | for i in ctx.lpse_host_list: 133 | self.assertTrue(i.is_valid) 134 | self.assertIsNone(i.error) 135 | self.assertTrue(i.url in urls and i.filename.name in filename) 136 | 137 | def test_kategori_not_in_choices(self): 138 | downloader = Downloader() 139 | self.assertRaises(SystemExit, downloader.get_ctx, f"--kategori HOHO {self.LPSE_HOST_2}".split()) 140 | 141 | def test_get_records_total(self): 142 | downloader = Downloader() 143 | downloader.get_ctx(f"--log=DEBUG --kategori PEKERJAAN_KONSTRUKSI {self.LPSE_HOST_2},{self.LPSE_HOST_3}".split()) 144 | 145 | for lpse_host in downloader.ctx.lpse_host_list: 146 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 147 | total = index_downloader.get_total_package(tahun=2020) 148 | self.assertTrue(type(total), int) 149 | 150 | def test_download_index(self): 151 | from pathlib import Path 152 | import sqlite3 153 | downloader = Downloader() 154 | downloader.get_ctx(f"{self.LPSE_HOST_1};test-download-index --tahun-anggaran 2027 --keep-index".split()) 155 | downloader.start() 156 | 157 | db_file = Path.cwd() / 'test-download-index.idx' 158 | self.assertTrue(db_file.is_file()) 159 | 160 | db = sqlite3.connect(str(db_file)) 161 | result = db.execute("SELECT COUNT(1) FROM INDEX_PAKET").fetchone()[0] 162 | self.assertTrue(result > 0) 163 | 164 | def test_index_db_row_factory(self): 165 | downloader = Downloader() 166 | downloader.get_ctx(f"--log=DEBUG {self.LPSE_HOST_1} --tahun-anggaran 2027".split()) 167 | 168 | for lpse_host in downloader.ctx.lpse_host_list: 169 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 170 | index_downloader.start() 171 | 172 | for index in index_downloader.get_index(): 173 | self.assertIsInstance(index, LpseIndex) 174 | 175 | def test_detail_downloader(self): 176 | downloader = Downloader() 177 | downloader.get_ctx(f"{self.LPSE_HOST_2}".split()) 178 | 179 | downloader.ctx.tahun = 2027 180 | 181 | for lpse_host in downloader.ctx.lpse_host_list: 182 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 183 | 184 | index_downloader.start() 185 | 186 | detail_downloader = DetailDownloader(index_downloader) 187 | detail_downloader.start() 188 | 189 | res = index_downloader.db.execute("SELECT COUNT(1) FROM main.INDEX_PAKET WHERE STATUS = 1").fetchone() 190 | 191 | self.assertTrue(res[0] > 0) 192 | 193 | def __init_db(self): 194 | downloader = Downloader() 195 | downloader.get_ctx(f"--tahun-anggaran 2027 {self.LPSE_HOST_1}".split()) 196 | 197 | logging.info("Start index download without detail") 198 | 199 | for lpse_host in downloader.ctx.lpse_host_list: 200 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 201 | index_downloader.start() 202 | 203 | total = index_downloader.db.execute("SELECT COUNT(1) FROM INDEX_PAKET WHERE DETAIL IS NOT NULL").fetchone()[0] 204 | self.assertEqual(total, 0) 205 | 206 | def test_resume_download(self): 207 | self.__init_db() 208 | downloader = Downloader() 209 | downloader.get_ctx(f"{self.LPSE_HOST_1} -r --tahun-anggaran 2027".split()) 210 | 211 | logging.info("Start index download with detail") 212 | 213 | downloader.start() 214 | 215 | for lpse_host in downloader.ctx.lpse_host_list: 216 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 217 | total = index_downloader.db.execute("SELECT COUNT(1) FROM INDEX_PAKET WHERE DETAIL IS NULL").fetchone()[0] 218 | self.assertEqual(total, 0) 219 | 220 | def test_resume_without_db(self): 221 | """ 222 | Test argument resume untuk lpse yang sebenarnya belum pernah didownload 223 | :return: 224 | """ 225 | downloader = Downloader() 226 | timestamp = int(time.time()) 227 | downloader.get_ctx(f"{self.LPSE_HOST_1};{timestamp} -r --tahun-anggaran 2027".split()) 228 | 229 | downloader.start() 230 | 231 | for lpse_host in downloader.ctx.lpse_host_list: 232 | index_downloader = IndexDownloader(downloader.ctx, lpse_host) 233 | total = index_downloader.db.execute("SELECT COUNT(1) FROM INDEX_PAKET WHERE DETAIL IS NULL").fetchone()[0] 234 | self.assertEqual(total, 0) 235 | 236 | def test_downloader_separator(self): 237 | downloader = Downloader() 238 | downloader.get_ctx('kp2mi;sep --tahun 2026 --sep |'.split()) 239 | downloader.start() 240 | 241 | with (Path.cwd() / 'sep.csv').open('r') as f: 242 | for row in csv.reader(f, delimiter="|"): 243 | print(len(row)) 244 | self.assertTrue(len(row) > 0) 245 | break 246 | 247 | def test_clear_working_dir(self): 248 | downloader = Downloader() 249 | downloader.get_ctx(f"{self.LPSE_HOST_1};index-deleted --tahun-anggaran 2027".split()) 250 | 251 | logging.info("Start index download with detail") 252 | 253 | downloader.start() 254 | 255 | index_path = Path.cwd() / 'index-deleted.idx' 256 | csv_path = Path.cwd() / 'index-deleted.csv' 257 | 258 | self.assertFalse(index_path.is_file()) 259 | self.assertTrue(csv_path.is_file()) 260 | 261 | def test_args_keep_index(self): 262 | downloader = Downloader() 263 | downloader.get_ctx(f"{self.LPSE_HOST_1};index-deleted --log DEBUG --tahun-anggaran 2027 --keep-index".split()) 264 | 265 | logging.info("Start index download with detail") 266 | 267 | downloader.start() 268 | 269 | index_path = Path.cwd() / 'index-deleted.idx' 270 | csv_path = Path.cwd() / 'index-deleted.csv' 271 | 272 | self.assertTrue(index_path.is_file()) 273 | self.assertTrue(csv_path.is_file()) 274 | 275 | def tearDown(self): 276 | csv = Path.cwd().glob('*.csv') 277 | idx = Path.cwd().glob('*.idx') 278 | txt = Path.cwd().glob('*.txt') 279 | 280 | for i in csv: 281 | i.unlink() 282 | 283 | for i in idx: 284 | try: 285 | i.unlink() 286 | except: 287 | continue 288 | 289 | for i in txt: 290 | i.unlink() 291 | 292 | 293 | if __name__ == '__main__': 294 | unittest.main() 295 | -------------------------------------------------------------------------------- /tests/test_lpse.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pyproc.utils 3 | from pyproc import Lpse, JenisPengadaan 4 | from pyproc.exceptions import LpseHostExceptions 5 | from datetime import datetime 6 | from urllib3.exceptions import InsecureRequestWarning 7 | from urllib3 import disable_warnings 8 | 9 | disable_warnings(InsecureRequestWarning) 10 | 11 | 12 | class TestLpse(unittest.TestCase): 13 | id_tender_selesai = None 14 | 15 | def setUp(self): 16 | self.lpse = Lpse("kemenkeu", timeout=60) 17 | self.id_tender_selesai = self.get_id_for_testing() 18 | 19 | def get_id_for_testing(self, batch=0): 20 | paket = self.lpse.get_paket_tender(start=0+batch*50, length=50) 21 | 22 | for i in paket['data']: 23 | if i[3].lower().strip() == 'tender sudah selesai': 24 | return i[0] 25 | 26 | if self.id_tender_selesai is None: 27 | return self.get_id_for_testing(batch=batch+1) 28 | 29 | def test_get_auth_token(self): 30 | token = self.lpse.get_auth_token() 31 | token_from_session = self.lpse.session.cookies['SPSE_SESSION'].split('___')[1].split('=')[1].strip('&') 32 | self.assertEqual(token, token_from_session) 33 | 34 | def test_get_encoded_session_auth_token(self): 35 | lpse = Lpse("lampungprov") 36 | token = lpse.get_auth_token() 37 | self.assertTrue(len(token) > 10) 38 | 39 | def test_get_paket_tender_kosong(self): 40 | data = self.lpse.get_paket_tender() 41 | 42 | self.assertIsInstance(data, dict) 43 | 44 | def test_get_paket_tender_by_tahun(self): 45 | """ 46 | khusus lpse dengan versi >= 4.4 47 | :return: 48 | """ 49 | current_year = datetime.now().year 50 | for tahun in range(current_year-3, current_year+1): 51 | lpse = Lpse("kemenkeu") 52 | data = lpse.get_paket_tender( 53 | length=25, 54 | tahun=tahun, 55 | data_only=True 56 | ) 57 | for i in data: 58 | self.assertTrue(str(tahun) in i[8]) 59 | 60 | def test_get_paket_tender_by_kategori(self): 61 | lpse = Lpse("kemenkeu", timeout=60) 62 | data = lpse.get_paket_tender( 63 | length=5, 64 | tahun=2021, 65 | data_only=True, 66 | kategori=JenisPengadaan.PENGADAAN_BARANG 67 | ) 68 | for i in data: 69 | self.assertTrue('pengadaan barang' in i[8].lower()) 70 | 71 | def test_get_paket_tender_by_instansi(self): 72 | lpse = Lpse("kemenkeu") 73 | data = lpse.get_paket_tender( 74 | length=5, 75 | data_only=True, 76 | instansi_id='L47' # KEPOLISIAN 77 | ) 78 | for i in data: 79 | self.assertTrue('kepolisian negara republik indonesia' in i[2].lower()) 80 | 81 | def test_get_paket_tender_isi(self): 82 | data = self.lpse.get_paket_tender(length=2) 83 | 84 | self.assertEqual(2, len(data['data'])) 85 | 86 | def test_get_paket_tender_pagination(self): 87 | data_1 = self.lpse.get_paket_tender(length=5) 88 | data_2 = self.lpse.get_paket_tender(start=4, length=5) 89 | 90 | self.assertEqual(data_1['data'][-1], data_2['data'][0]) 91 | 92 | def test_get_paket_tender_search(self): 93 | keyword = 'sekolah' 94 | data = self.lpse.get_paket_tender(length=1, search_keyword=keyword) 95 | 96 | for i in data['data']: 97 | self.assertEqual(True, keyword.lower() in i[1].lower()) 98 | 99 | def test_get_detil_tender(self): 100 | data = self.lpse.get_paket_tender(length=1) 101 | id_paket = data['data'][0][0] 102 | detil = self.lpse.detil_paket_tender(id_paket) 103 | 104 | detil.get_pengumuman() 105 | 106 | self.assertEqual(id_paket, detil.pengumuman['kode_tender']) 107 | 108 | def test_get_peserta_tender(self): 109 | data = self.lpse.get_paket_tender(length=1) 110 | id_paket = data['data'][0][0] 111 | detil = self.lpse.detil_paket_tender(id_paket) 112 | 113 | detil.get_peserta() 114 | 115 | self.assertIsInstance(detil.peserta, list) 116 | 117 | def test_get_hasil_evaluasi_tender(self): 118 | detil = self.lpse.detil_paket_tender(10080116000) 119 | 120 | detil.get_hasil_evaluasi() 121 | 122 | self.assertIsInstance(detil.hasil, list) 123 | 124 | def test_get_pemenang_tender(self): 125 | detil = self.lpse.detil_paket_tender(10080116000) 126 | detil.get_pemenang() 127 | for i, v in detil.pemenang[0].items(): 128 | self.assertIsNotNone(v) 129 | 130 | def test_get_pemenang_tender_kosong(self): 131 | # data = self.lpse.get_paket_tender(length=1) 132 | # id_paket = data['data'][0][0] 133 | # detil = self.lpse.detil_paket_tender(id_paket) 134 | # pemenang = detil.get_pemenang() 135 | # print(pemenang) 136 | # 137 | # self.assertEqual(pemenang, None) 138 | print("Data terlalu dinamis untuk di test. Uncomment fungsi ini lalu masukan ID tender secara manual untuk di test") 139 | pass 140 | 141 | def test_get_pemenang_berkontrak_tender(self): 142 | detil = self.lpse.detil_paket_tender(self.id_tender_selesai) 143 | detil.get_pemenang_berkontrak() 144 | 145 | if not detil.pemenang_berkontrak: 146 | print("Belum ada pemenang berkontrak") 147 | return 148 | 149 | for i, v in detil.pemenang_berkontrak[0].items(): 150 | self.assertIsNotNone(v) 151 | 152 | def test_get_jadwal_tender(self): 153 | data = self.lpse.get_paket_tender(length=1) 154 | detil = self.lpse.detil_paket_tender(data['data'][0][0]) 155 | detil.get_jadwal() 156 | jadwal_key = ['no', 'tahap', 'mulai', 'sampai', 'perubahan'] 157 | 158 | self.assertIsInstance(detil.jadwal, list) 159 | for key in detil.jadwal[0]: 160 | self.assertEqual(True, key in jadwal_key) 161 | 162 | def test_detil_todict(self): 163 | detil = self.lpse.detil_paket_tender(self.id_tender_selesai) 164 | detil.get_all_detil() 165 | 166 | self.assertIsInstance(detil.todict(), dict) 167 | 168 | def test_detil_todict_todict(self): 169 | detil = self.lpse.detil_paket_tender(self.id_tender_selesai) 170 | detil.get_all_detil() 171 | detil.todict() 172 | detil.todict() 173 | 174 | self.assertIsInstance(detil.todict(), dict) 175 | 176 | def test_detil_id_random(self): 177 | detil = self.lpse.detil_paket_tender(111).todict() 178 | for i in detil: 179 | if i == 'id_paket': 180 | continue 181 | self.assertIsNone(detil[i]) 182 | 183 | def test_lpse_detil_referer(self): 184 | lpse = Lpse("kemenkeu") 185 | detil = lpse.detil_paket_tender(10080116000) 186 | detil.get_all_detil() 187 | self.assertIsNotNone(detil.pengumuman) 188 | 189 | def test_lpse45_dowmload_index_error(self): 190 | lpse = Lpse("kemenkeu") 191 | index = lpse.get_paket_tender(0, 10) 192 | print(index) 193 | 194 | def tearDown(self): 195 | del self.lpse 196 | 197 | 198 | class TestPaketNonTender(unittest.TestCase): 199 | 200 | def setUp(self): 201 | self.lpse = Lpse('jakarta', timeout=30) 202 | self.lpse.skip_spse_check = True 203 | self.lpse.auth_token = self.lpse.get_auth_token() 204 | self.id_non_tender_for_testing = self.get_id_for_testing() 205 | 206 | def get_id_for_testing(self): 207 | paket = self.lpse.get_paket_non_tender(start=0, length=50) 208 | 209 | for i in paket['data']: 210 | if i[3].lower().strip() == 'paket sudah selesai': 211 | return i[0] 212 | 213 | def test_get_paket_non_tender(self): 214 | paket = self.lpse.get_paket_non_tender(length=5) 215 | 216 | self.assertEqual(len(paket['data']), 5) 217 | 218 | def test_get_detil_pengumuman_non_tender(self): 219 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 220 | detil.get_pengumuman() 221 | 222 | for i, v in detil.pengumuman.items(): 223 | self.assertIsNotNone(v) 224 | 225 | def test_get_detil_peserta_non_tender(self): 226 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 227 | detil.get_peserta() 228 | 229 | for peserta in detil.peserta: 230 | for i, v in peserta.items(): 231 | self.assertIsNotNone(v) 232 | 233 | def test_get_detil_hasil_non_tender(self): 234 | detil = self.lpse.detil_paket_non_tender(10039999000) 235 | detil.get_hasil_evaluasi() 236 | 237 | for hasil in detil.hasil: 238 | for i, v in hasil.items(): 239 | self.assertIsNotNone(v) 240 | 241 | def test_get_detil_pemenang_non_tender(self): 242 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 243 | detil.get_pemenang() 244 | 245 | for pemenang in detil.pemenang: 246 | for i, v in pemenang.items(): 247 | self.assertIsNotNone(v) 248 | 249 | def test_get_detil_jadwal_non_tender(self): 250 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 251 | detil.get_jadwal() 252 | 253 | for row in detil.jadwal: 254 | for i, v in row.items(): 255 | self.assertIsNotNone(v) 256 | 257 | def test_detil_todict(self): 258 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 259 | detil.get_all_detil() 260 | 261 | self.assertIsInstance(detil.todict(), dict) 262 | 263 | def test_detil_todict_todict(self): 264 | detil = self.lpse.detil_paket_non_tender(self.id_non_tender_for_testing) 265 | detil.get_all_detil() 266 | detil.todict() 267 | detil.todict() 268 | 269 | self.assertIsInstance(detil.todict(), dict) 270 | 271 | def test_detil_id_random(self): 272 | detil = self.lpse.detil_paket_tender(111).todict() 273 | for i in detil: 274 | if i == 'id_paket': 275 | continue 276 | self.assertIsNone(detil[i]) 277 | 278 | def tearDown(self): 279 | del self.lpse 280 | 281 | 282 | class TestLpsePemenangDoubleTender(unittest.TestCase): 283 | 284 | def setUp(self): 285 | host = 'tanjabtimkab' 286 | self.lpse = Lpse(host) 287 | 288 | def test_pemenang(self): 289 | expected_winner = { 290 | 3346331: ['CV. NIBUNG PUTIH', 'CV. PUTRA NAULI'], 291 | 3349331: ['CV. CAHAYA ERVIN GEMILANG', 'CV.Sentosa Alam Lestari'], 292 | } 293 | 294 | for id_tender in expected_winner: 295 | detil = self.lpse.detil_paket_tender(id_tender) 296 | pemenang = detil.get_pemenang() 297 | 298 | for p in pemenang: 299 | self.assertTrue(p['nama_pemenang'] in expected_winner[id_tender]) 300 | 301 | 302 | def test_pemenang_hasil_evaluasi(self): 303 | detil = self.lpse.detil_paket_tender(3346331) 304 | detil.get_hasil_evaluasi() 305 | pemenang = list(filter(lambda x: x['pemenang'], detil.hasil))[0] 306 | 307 | self.assertEqual(pemenang['nama_peserta'], 'CV. NIBUNG PUTIH') 308 | self.assertEqual(pemenang['npwp'], '0*.0**.1**.*-*34.**0') 309 | 310 | def tearDown(self): 311 | del self.lpse 312 | 313 | 314 | class TestLpseKolomPemenangTidakLengkap(unittest.TestCase): 315 | 316 | def setUp(self): 317 | host = 'kaltaraprov' 318 | self.lpse = Lpse(host) 319 | 320 | def test_get_pemenang(self): 321 | detil = self.lpse.detil_paket_tender(1569716) 322 | pemenang = detil.get_pemenang() 323 | self.assertEqual( 324 | pemenang, 325 | [{'nama_pemenang': 'CV. NAJAH', 326 | 'alamat': 'JL. IMAM BONJOL TANJUNG SELOR - Bulungan (Kab.) - Kalimantan Utara', 327 | 'npwp': '0*.6**.8**.*-*27.**0', 328 | 'harga_penawaran': 0, 329 | 'harga_terkoreksi': 0, 330 | 'hasil_negosiasi': 0, 331 | 'harga_negosiasi': 0}] 332 | ) 333 | 334 | def tearDown(self): 335 | del self.lpse 336 | 337 | 338 | class TestPaketTenderRUP(unittest.TestCase): 339 | def test_get_rup_multiple_rows(self): 340 | lpse = Lpse('kalselprov') 341 | detail = lpse.detil_paket_tender('9316181') 342 | detail.get_pengumuman() 343 | print(detail.pengumuman['rencana_umum_pengadaan']) 344 | 345 | 346 | # api tidak stabil 347 | # class TestGetAllLpseHost(unittest.TestCase): 348 | # def test_get_all_host(self): 349 | # import logging 350 | # pyproc.utils.download_host(logging) 351 | # self.assertTrue((Path.cwd() / 'daftarlpse.csv').is_file()) 352 | 353 | 354 | class UtilsTest(unittest.TestCase): 355 | 356 | def test_compare_version(self): 357 | self.assertTrue(pyproc.utils.parse_version('v4.4u20220509') < pyproc.utils.parse_version('v4.5u20220520')) 358 | 359 | def test_parse_version(self): 360 | self.assertEqual( 361 | pyproc.utils.parse_version('v4.5u20220520'), 362 | (4, 5, 20220520) 363 | ) 364 | 365 | 366 | if __name__ == '__main__': 367 | unittest.main() 368 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyProc 2 | 3 | ![Build Status](https://github.com/wakataw/pyproc/actions/workflows/pyproc-pypi.yml/badge.svg) [![Version](https://img.shields.io/badge/version-v0.2a-red)](https://pypi.org/project/pyproc/) [![Python >=3.9](https://img.shields.io/badge/python->=3.9-yellow.svg)](https://www.python.org/downloads/) [![Open Source Love](https://badges.frapsoft.com/os/v1/open-source.svg?v=102)](https://github.com/wakataw/pyproc) 4 | 5 | PyProc (Python Procurement) merupakan wrapper untuk API SPSE yang ditulis dalam bahasa Python. Sistem Pengadaan Secara Elektronik (SPSE) SPSE merupakan aplikasi e-Procurement yang dikembangkan oleh LKPP untuk digunakan oleh LPSE di instansi pemerintah seluruh Indonesia. 6 | 7 | > DISCLAIMER: 8 | > 9 | > Penulis tidak terafiliasi dengan pengembang SPSE atau pemilik aplikasi SPSE. Software ini dikembangkan dengan tujuan akademis, bentuk pengawasan oleh masyarakat, dan membantu pengusaha untuk mempermudah otomasi perolehan informasi pengadaan dari pemerintah. 10 | > 11 | > Penggunaan yang tidak wajar dan mengganggu sebagian atau seluruh fungsi aplikasi SPSE pada satuan kerja menjadi tanggung jawab masing-masing pengguna. 12 | > 13 | > PyProc ada karena SPSE ada, jadi gunakanlah dengan bijak dan secukupnya. 14 | 15 | ## Pemasangan 16 | 17 | Pemasangan PyProc via `pip`: 18 | ```bash 19 | $ pip install pyproc 20 | ``` 21 | 22 | Upgrade PyProc via `pip`: 23 | ```bash 24 | $ pip install pyproc --upgrade 25 | ``` 26 | 27 | Instalasi versi unstable: 28 | ```bash 29 | $ pip install git+https://github.com/wakataw/pyproc.git 30 | ``` 31 | 32 | ## Penggunaan Command Line Interface 33 | 34 | ### Download Data LPSE 35 | Format Command 36 | ```bash 37 | $ pyproc [ARGUMENT] DAFTAR_LPSE 38 | ``` 39 | **Arguments** 40 | 41 | | argumen | contoh | diperlukan | default | keterangan | 42 | |-----------------------------|---------------------------------------------|------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 43 | | `DAFTAR_LPSE` | `pyproc pu` | Ya | - | Daftar alamat LPSE yang akan diunduh.
[Format Daftar LPSE](#format-daftar-lpse-lanjutan) | 44 | | `-h --help` | `pyproc --help` | optional | - | menampilkan keterangan dan bantuan | 45 | | `-k --keyword` | `pyproc --keyword "mobil dinas" ...` | optional | - | filter pencarian index paket berdasarkan kata kunci tertentu | 46 | | `-t --tahun-anggaran` | `pyproc --tahun-anggaran 2021 ...` | optional | Tahun Berjalan | Filter pencarian index paket berdasarkan tahun anggaran tertentu. Fungsi ini hanya berlaku mulai dari SPSE 4.4.

Format Penulisan:
**ALL**: mengunduh seluruh data
**2021**: mengunduh data untuk tahun 2021
**2015,2018,2019**: mengunduh data untuk tahun 2015, 2018, dan 2019
**2011-2020** mengunduh data untuk tahun 2011 s.d. 2020 | 47 | | `--kategori` | `pyproc --kategori PENGADAAN_BARANG ...` | optional | - | Filter pencarian berdasarkan kategori pengadaan.
Daftar kategori: `PENGADAAN_BARANG`, `JASA_KONSULTANSI_BADAN_USAHA_NON_KONSTRUKSI`, `PEKERJAAN_KONSTRUKSI`, `JASA_LAINNYA`, `JASA_KONSULTANSI_PERORANGAN`, `JASA_KONSULTANSI_BADAN_USAHA_KONSTRUKSI` | 48 | | `--nama-penyedia` | `pyproc --nama-penyedia "PT SUKA MAJU" ...` | optional | - | Filter pencarian index paket berdasarkan nama penyedia | 49 | | `-c --chunk-size` | `pyproc --chunk-size 25 ...` | optional | 25 | Jumlah daftar paket per halaman yang diunduh. Semakin besar jumlah tidak menjamin proses download semakin cepat. Gunakanlah jumlah data yang wajar sehingga tidak membebani server SPSE. | 50 | | `-w --workers` | `pyproc --workers 4 ...` | optional | 8 | Jumlah koneksi yang berjalan secara bersamaan saat mengunduh detil paket dengan maksimal 10 worker. | 51 | | `-x --timeout` | `pyproc --timeout 60 ...` | optional | 30 | Waktu tunggu jika koneksi lambat (dalam detik) | 52 | | `-n --non-tender` | `pyproc --non-tender ...` | optional | FALSE | Tambahkan argumen ini untuk mengunduh data non-tender/pengadaan langsung | 53 | | `-d --index-download-delay` | `pyproc --index-download-delay 5 ...` | optional | 1 | Waktu jeda download index paket untuk setiap halaman/batch | 54 | | `-o --output` | `pyproc --ouput csv ...` | optional | csv | Jenis data keluaran/hasil dari download. Format yang didukung csv dan json. Karena keterbatasan format, tidak semua data ditampilkan pada format csv. Jika memerlukan data detil yang komprehensif, gunakan format json karena mencangkup semua data detail. | 55 | | `--keep-index` | `pyproc --keep-index ...` | optinal | FALSE | pyproc akan membentuk file idx (sqlite3 database) saat proses download dan akan dihapus ketika proses selesai. Tambahkan argumen ini jika tidak ingin menghapus database tersebut. | 56 | | `-r --resume` | `pyproc --resume ...` | optinal | FALSE | Tambahkan argument ini untuk melanjutkan proses yang gagal (karena internet putus atau gangguan koneksi lainnya). Namun pastikan bahwa seluruh index sudah berhasil diunduh karena argumen --resume akan melewati proses download index. | 57 | | `-s --sep` | `pyproc --sep ";" kemenkeu` | optional | `;` titik koma | Set custom separator untuk output format csv | 58 | | `--log` | `pyproc --log INFO ...` | optional | INFO | Argumen untuk setting informasi yang ditampilkan pyproc pada terminal. Daftar nilai yang didukung:
`DEBUG`: menampilkan informasi sedetil mungkin
`INFO`: menampilkan informasi penting saja
`WARNING`: hanya menampilkan informasi yang bersifat warning
`ERROR`: hanya menampilkan error
`CRITICAL`: hanya menampilkan permasalahan yang bersifat kritis saja | 59 | 60 | ### Format Daftar LPSE (lanjutan) 61 | PyProc dapat mengunduh data dari 1 atau lebih LPSE. Proses tersebut akan berjalan sesuai dengan nilai `DAFTAR_LPSE` yang diberikan user. Beberapa format yang didukung oleh PyProc adalah sebagai berikut: 62 | - Download data dengan menyertakan nama file hasil download 63 | 64 | Untuk set nama file secara manual, gunakan format `"alamatlpse[titik_koma]namafile"`. 65 | 66 | ```bash 67 | $ pyproc "kemenkeu;namafileouputkemenkeu" --output json 68 | ``` 69 | 70 | perintah ini akan mengunduh data LPSE PU dan mengekspor data ke file `namafileouputkemenkeu.json` 71 | 72 | - Download data lebih dari 1 LPSE 73 | 74 | Untuk mengunduh lebih dari 1 lpse secara bersamaan, gunakan format `"alamat1[koma]alamat2[koma]alamat3"` 75 | 76 | ```bash 77 | $ pyproc jakarta,pu,kemenkeu,sumbarprov 78 | ``` 79 | 80 | atau dengan menyertakan namafile dengan format `"alamat1[titikkoma]nama1[koma]alamat2[titikkoma]nama2"` 81 | 82 | ```bash 83 | $ pyproc "jakarta;filejakarta,pu:filepu,kemenkeu:filekemenkeu,sumbarprov:filesumbarprov" 84 | ``` 85 | 86 | - Download data berdasrakan daftar lpse pada file csv 87 | Download paket LPSE dengan sumber alamat dari file 88 | ```bash 89 | $ pyproc daftarlpse.csv 90 | 91 | # konten daftarlpse.csv 92 | sumbarprov 93 | pu 94 | kemenkeu 95 | 96 | # konten daftarlpse.csv dengan nama hasil download 97 | sumbarprov;lpse-sumbar 98 | pu;lpse-pu.csv 99 | kemenkeu;lpse-kemenkeu 100 | ``` 101 | 102 | ## Penggunaan PyProc Sebagai Package 103 | 104 | Untuk dapat menggunakan PyProc, anda harus mengimpornya terlebih dahulu dan menginisiasi objek `Lpse` 105 | 106 | ```python 107 | from pyproc import Lpse 108 | 109 | # Inisiasi objek lpse kementerian pu 110 | lpse = Lpse('kemenkeu') 111 | ``` 112 | 113 | ### Pencarian Daftar Paket Lelang 114 | 115 | ```python 116 | from pyproc import Lpse 117 | 118 | # Inisiasi objek lpse kementerian pu 119 | lpse = Lpse('pu') 120 | 121 | # mendapatkan daftar paket lelang 122 | daftar_lelang = lpse.get_paket_tender(start=0, length=2) 123 | print(daftar_lelang) 124 | 125 | # pencarian paket non tender (penunjukkan langsung) 126 | daftar_pl = lpse.get_paket_non_tender(start=0, length=30) 127 | ``` 128 | 129 | Pencarian Paket dengan mengurutkan berdasarkan kolom tertentu 130 | ```python 131 | from pyproc import Lpse 132 | from pyproc.lpse import By 133 | 134 | lpse = Lpse('sumbarprov') 135 | 136 | # pencarian daftar lelang, urutkan berdasarkan Harga Perkiraan Sendiri 137 | daftar_lelang = lpse.get_paket_tender(start=0, length=30, order=By.HPS) 138 | ``` 139 | 140 | Filter pencarian paket berdasarkan kategori pengadaan 141 | ```python 142 | from pyproc import Lpse 143 | from pyproc import JenisPengadaan 144 | 145 | # Inisiasi objek lpse kementerian padang 146 | lpse = Lpse('padang') 147 | 148 | # Kategori Pengadaan Barang 149 | paket_pengadaan_barang = lpse.get_paket_tender(start=0, length=30, kategori=JenisPengadaan.PENGADAAN_BARANG) 150 | paket_konstruksi = lpse.get_paket_tender(start=0, length=30, kategori=JenisPengadaan.PEKERJAAN_KONSTRUKSI) 151 | 152 | # dst untuk kategori lainnya 153 | ``` 154 | 155 | ### Pencarian Detil Paket Lelang 156 | 157 | ```python 158 | from pyproc import Lpse 159 | 160 | lpse = Lpse('jakarta') 161 | 162 | # mendapatkan semua detil paket lelang 163 | detil = lpse.detil_paket_tender(id_paket='48658064') 164 | detil.get_all_detil() 165 | print(detil) 166 | 167 | # mendapatkan hanya pemenang lelang 168 | pemenang = detil.get_pemenang() 169 | print(pemenang) 170 | ``` 171 | 172 | ## Uninstall 173 | 174 | Untuk uninstall package jalankan perintah berikut: 175 | ```bash 176 | $ pip uninstall pyproc 177 | ``` 178 | 179 | ## License 180 | Paket ini di-release di bawah lisensi MIT. 181 | 182 | ## Donatur ☕️ 183 | Orang-orang yang berjasa menyediakan kopi sehingga pengembangan paket tetap berjalan 184 | - Angga Rinaldi Rizal (50 cangkir ☕️) 185 | -------------------------------------------------------------------------------- /pyproc/lpse.py: -------------------------------------------------------------------------------- 1 | import time 2 | import bs4 3 | import requests 4 | import re 5 | import logging 6 | import backoff 7 | from . import utils 8 | from bs4 import BeautifulSoup as Bs, NavigableString 9 | from .exceptions import LpseVersionException, LpseServerExceptions, LpseHostExceptions 10 | from enum import Enum 11 | from abc import abstractmethod 12 | from urllib.parse import urlparse 13 | 14 | 15 | class By(Enum): 16 | KODE = 0 17 | NAMA_PAKET = 1 18 | INSTANSI = 2 19 | HPS = 4 20 | 21 | 22 | class JenisPengadaan(Enum): 23 | """ 24 | Objek untuk menampung data kodifikasi jenis pengadaan 25 | """ 26 | PENGADAAN_BARANG = 0 27 | JASA_KONSULTANSI_BADAN_USAHA_NON_KONSTRUKSI = 1 28 | PEKERJAAN_KONSTRUKSI = 2 29 | JASA_LAINNYA = 3 30 | JASA_KONSULTANSI_PERORANGAN = 4 31 | JASA_KONSULTANSI_BADAN_USAHA_KONSTRUKSI = 5 32 | 33 | 34 | class Lpse(object): 35 | 36 | def __init__(self, instansi, timeout=10): 37 | self.session = requests.session() 38 | self.session.verify = False 39 | self.session.headers = { 40 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' 41 | 'AppleWebKit/537.36 (KHTML, like Gecko) ' 42 | 'Chrome/102.0.5005.61 Safari/537.36' 43 | } 44 | self.timeout = timeout 45 | self.auth_token = None 46 | self.url = f"https://spse.inaproc.id/{instansi}" 47 | 48 | 49 | @staticmethod 50 | def check_error(resp): 51 | error_message = None 52 | content = resp.text 53 | 54 | if resp.status_code >= 400 or \ 55 | re.findall(r'Maaf, terjadi error pada aplikasi SPSE.', content) or \ 56 | re.findall(r'Terjadi Kesalahan', content): 57 | error_message = "Terjadi error pada aplikasi SPSE." 58 | error_code = re.findall(r'Kode Error: ([\da-zA-Z]+)', content) 59 | 60 | if error_code: 61 | error_message += ' Kode Error: ' + error_code[0] 62 | elif re.findall('Halaman yang dituju tidak ditemukan', content): 63 | error_message = "Paket tidak ditemukan" 64 | 65 | if error_message is not None: 66 | error_message = "{} - {}".format( 67 | resp.url, 68 | error_message 69 | ) 70 | raise LpseServerExceptions(error_message) 71 | 72 | def get_auth_token(self, from_cookies=True): 73 | """ 74 | Melakukan pengambilan auth token 75 | :return: token (str) 76 | """ 77 | 78 | r = self.session.get(self.url + '/lelang') 79 | 80 | if from_cookies: 81 | auth_token = re.findall(r'___AT=([A-Za-z0-9]+)&', self.session.cookies.get('SPSE_SESSION')) 82 | 83 | if auth_token: 84 | return auth_token[0] 85 | 86 | return utils.parse_token(r.text) 87 | 88 | @backoff.on_exception(backoff.fibo, 89 | (LpseServerExceptions, requests.exceptions.RequestException, 90 | requests.exceptions.ConnectionError), 91 | jitter=None, max_tries=3) 92 | def get_paket(self, jenis_paket, start=0, length=0, data_only=False, 93 | kategori=None, search_keyword=None, nama_penyedia=None, 94 | order=By.KODE, tahun=None, ascending=False, instansi_id=None): 95 | """ 96 | Melakukan pencarian paket pengadaan 97 | :param jenis_paket: Paket Pengadaan Lelang (lelang) atau Penunjukkan Langsung (pl) 98 | :param start: index data awal 99 | :param length: jumlah data yang ditampilkan 100 | :param data_only: hanya menampilkan data tanpa menampilkan informasi lain 101 | :param kategori: kategori pengadaan (lihat di lpse.JenisPengadaan) 102 | :param search_keyword: keyword pencarian paket pengadaan 103 | :param nama_penyedia: filter berdasarkan nama penyedia 104 | :param order: Mengurutkan data berdasarkan kolom 105 | :param tahun: Tahun Pengadaan 106 | :param ascending: Ascending, descending jika diset False 107 | :param instansi_id: Filter pencarian berdasarkan instansi atau satker tertentu 108 | :return: dictionary dari hasil pencarian paket (atau list jika data_only=True) 109 | """ 110 | 111 | # TODO: Header dari data berbeda untuk tiap SPSE masing-masing ILAP. 112 | # Cek tiap LPSE tiap ilap untuk menentukan header dari data 113 | 114 | if not self.auth_token: 115 | self.auth_token = self.get_auth_token() 116 | 117 | params = { 118 | 'draw': 1, 119 | 'start': start, 120 | 'length': length, 121 | 'tahun': tahun, 122 | 'search[value]': search_keyword if search_keyword else '', 123 | 'search[regex]': 'false', 124 | 'order[0][column]': order.value, 125 | 'order[0][dir]': 'asc' if ascending else 'desc', 126 | 'authenticityToken': self.auth_token, 127 | '_': int(time.time()*1000) 128 | } 129 | 130 | for i in range(0, 5): 131 | params.update( 132 | { 133 | 'columns[{}][data]'.format(i): i, 134 | 'columns[{}][name]'.format(i): '', 135 | 'columns[{}][searchable]'.format(i): 'true' if i != 3 else 'false', 136 | 'columns[{}][orderable]'.format(i): 'true' if i != 3 else 'false', 137 | 'columns[{}][search][value]'.format(i): '', 138 | 'columns[{}][search][regex]'.format(i): 'false' 139 | } 140 | ) 141 | 142 | if kategori: 143 | params.update({'kategoriId': kategori.value}) 144 | 145 | if nama_penyedia: 146 | params.update({'rekanan': nama_penyedia}) 147 | params.update({'rkn_nama': nama_penyedia}) 148 | 149 | if instansi_id: 150 | params.update({'instansiId': instansi_id}) 151 | 152 | # prepare request GET dan POST untuk spse 4.5.20221227 153 | headers = { 154 | 'X-Requested-With': 'XMLHttpRequest', 155 | 'Referer': self.url + '/lelang', 156 | 'Sec-Fetch-Mode': 'cors', 157 | 'Sec-Fetch-Site': 'same-origin', 158 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 159 | 'AppleWebKit/537.36 (KHTML, like Gecko) ' 160 | 'Chrome/77.0.3865.90 Safari/537.36' 161 | } 162 | url = self.url + '/dt/' + jenis_paket 163 | 164 | data = self.session.post( 165 | url, 166 | data=params, 167 | verify=False, 168 | timeout=self.timeout, 169 | headers=headers 170 | ) 171 | 172 | logging.debug(data.content) 173 | self.check_error(data) 174 | 175 | data.encoding = 'UTF-8' 176 | 177 | if data_only: 178 | return data.json()['data'] 179 | 180 | return data.json() 181 | 182 | def get_paket_tender(self, start=0, length=0, data_only=False, 183 | kategori=None, search_keyword=None, nama_penyedia=None, 184 | order=By.KODE, tahun=None, ascending=False, instansi_id=None): 185 | """ 186 | Wrapper pencarian paket tender 187 | :param start: index data awal 188 | :param length: jumlah data yang ditampilkan 189 | :param data_only: hanya menampilkan data tanpa menampilkan informasi lain 190 | :param kategori: kategori pengadaan (lihat di pypro.kategori) 191 | :param search_keyword: keyword pencarian paket pengadaan 192 | :param nama_penyedia: filter berdasarkan nama penyedia 193 | :param order: Mengurutkan data berdasarkan kolom 194 | :param tahun: Tahun Pengadaan 195 | :param ascending: Ascending, descending jika diset False 196 | :param instansi_id: Filter pencarian berdasarkan instansi atau satker tertentu 197 | :return: dictionary dari hasil pencarian paket (atau list jika data_only=True) 198 | """ 199 | return self.get_paket('lelang', start, length, data_only, kategori, search_keyword, nama_penyedia, 200 | order, tahun, ascending, instansi_id) 201 | 202 | def get_paket_non_tender(self, start=0, length=0, data_only=False, kategori=None, search_keyword=None, 203 | order=By.KODE, tahun=None, ascending=False, instansi_id=None): 204 | """ 205 | Wrapper pencarian paket non tender 206 | :param start: index data awal 207 | :param length: jumlah data yang ditampilkan 208 | :param data_only: hanya menampilkan data tanpa menampilkan informasi lain 209 | :param kategori: kategori pengadaan (lihat di pypro.kategori) 210 | :param search_keyword: keyword pencarian paket pengadaan 211 | :param nama_penyedia: filter berdasarkan nama penyedia 212 | :param order: Mengurutkan data berdasarkan kolom 213 | :param tahun: Tahun pengadaan 214 | :param ascending: Ascending, descending jika diset False 215 | :param instansi_id: Filter pencarian berdasarkan instansi atau satker tertentu 216 | :return: dictionary dari hasil pencarian paket (atau list jika data_only=True) 217 | """ 218 | return self.get_paket('pl', start, length, data_only, kategori, search_keyword, None, order, tahun, 219 | ascending, instansi_id) 220 | 221 | def detil_paket_tender(self, id_paket): 222 | """ 223 | Mengambil detil pengadaan 224 | :param id_paket: 225 | :return: 226 | """ 227 | return LpseDetil(self, id_paket) 228 | 229 | def detil_paket_non_tender(self, id_paket): 230 | """ 231 | Mengambil detil pengadaan non tender (penunjukkan langsung) 232 | :param id_paket: id_paket non tender 233 | :return: 234 | """ 235 | return LpseDetilNonTender(self, id_paket) 236 | 237 | def __del__(self): 238 | self.session.close() 239 | del self.session 240 | 241 | 242 | class BaseLpseDetil(object): 243 | def __init__(self, lpse, id_paket): 244 | self._lpse = lpse 245 | self.id_paket = id_paket 246 | self.pengumuman = None 247 | self.peserta = None 248 | self.hasil = None 249 | self.pemenang = None 250 | self.pemenang_berkontrak = None 251 | self.jadwal = None 252 | 253 | def get_all_detil(self): 254 | info = { 255 | 'error': False, 256 | 'error_message': [] 257 | } 258 | for name in ['get_pengumuman', 'get_peserta', 'get_hasil_evaluasi', 'get_pemenang', 'get_pemenang_berkontrak', 259 | 'get_jadwal']: 260 | try: 261 | getattr(self, name)() 262 | except Exception as e: 263 | info['error'] = True 264 | info['error_message'].append( 265 | '{} - {} - {}'.format(e, self.id_paket, name) 266 | ) 267 | return info 268 | 269 | def __str__(self): 270 | return str(self.todict()) 271 | 272 | def todict(self): 273 | data = self.__dict__.copy() 274 | data.pop('_lpse') 275 | return data 276 | 277 | 278 | class LpseDetil(BaseLpseDetil): 279 | 280 | @backoff.on_exception(backoff.fibo, 281 | (LpseServerExceptions, requests.exceptions.RequestException, 282 | requests.exceptions.ConnectionError), 283 | max_tries=3, jitter=None) 284 | def get_pengumuman(self): 285 | self.pengumuman = LpseDetilPengumumanParser(self._lpse, self.id_paket).get_detil() 286 | 287 | return self.pengumuman 288 | 289 | @backoff.on_exception(backoff.fibo, 290 | (LpseServerExceptions, requests.exceptions.RequestException, 291 | requests.exceptions.ConnectionError), 292 | max_tries=3, jitter=None) 293 | def get_peserta(self): 294 | self.peserta = LpseDetilPesertaParser(self._lpse, self.id_paket).get_detil() 295 | 296 | return self.peserta 297 | 298 | @backoff.on_exception(backoff.fibo, 299 | (LpseServerExceptions, requests.exceptions.RequestException, 300 | requests.exceptions.ConnectionError), 301 | max_tries=3, jitter=None) 302 | def get_hasil_evaluasi(self): 303 | self.hasil = LpseDetilHasilEvaluasiParser(self._lpse, self.id_paket).get_detil() 304 | 305 | return self.hasil 306 | 307 | @backoff.on_exception(backoff.fibo, 308 | (LpseServerExceptions, requests.exceptions.RequestException, 309 | requests.exceptions.ConnectionError), 310 | max_tries=3, jitter=None) 311 | def get_pemenang(self, all=False, key='hasil_negosiasi'): 312 | self.pemenang = LpseDetilPemenangParser( 313 | self._lpse, 314 | self.id_paket, 315 | all=all, 316 | key=key 317 | ).get_detil() 318 | 319 | return self.pemenang 320 | 321 | @backoff.on_exception(backoff.fibo, 322 | (LpseServerExceptions, requests.exceptions.RequestException, 323 | requests.exceptions.ConnectionError), 324 | max_tries=3, jitter=None) 325 | def get_pemenang_berkontrak(self): 326 | self.pemenang_berkontrak = LpseDetilPemenangBerkontrakParser(self._lpse, self.id_paket).get_detil() 327 | 328 | return self.pemenang_berkontrak 329 | 330 | @backoff.on_exception(backoff.fibo, 331 | (LpseServerExceptions, requests.exceptions.RequestException, 332 | requests.exceptions.ConnectionError), 333 | max_tries=3, jitter=None) 334 | def get_jadwal(self): 335 | self.jadwal = LpseDetilJadwalParser(self._lpse, self.id_paket).get_detil() 336 | 337 | return self.jadwal 338 | 339 | 340 | class LpseDetilNonTender(BaseLpseDetil): 341 | 342 | @backoff.on_exception(backoff.fibo, 343 | (LpseServerExceptions, requests.exceptions.RequestException, 344 | requests.exceptions.ConnectionError), 345 | max_tries=3, jitter=None) 346 | def get_pengumuman(self): 347 | self.pengumuman = LpseDetilPengumumanNonTenderParser(self._lpse, self.id_paket).get_detil() 348 | 349 | return self.pengumuman 350 | 351 | @backoff.on_exception(backoff.fibo, 352 | (LpseServerExceptions, requests.exceptions.RequestException, 353 | requests.exceptions.ConnectionError), 354 | max_tries=3, jitter=None) 355 | def get_peserta(self): 356 | self.peserta = LpseDetilPesertaNonTenderParser(self._lpse, self.id_paket).get_detil() 357 | 358 | return self.peserta 359 | 360 | @backoff.on_exception(backoff.fibo, 361 | (LpseServerExceptions, requests.exceptions.RequestException, 362 | requests.exceptions.ConnectionError), 363 | max_tries=3, jitter=None) 364 | def get_hasil_evaluasi(self): 365 | self.hasil = LpseDetilHasilEvaluasiNonTenderParser(self._lpse, self.id_paket).get_detil() 366 | 367 | return self.hasil 368 | 369 | @backoff.on_exception(backoff.fibo, 370 | (LpseServerExceptions, requests.exceptions.RequestException, 371 | requests.exceptions.ConnectionError), 372 | max_tries=3, jitter=None) 373 | def get_pemenang(self): 374 | self.pemenang = LpseDetilPemenangNonTenderParser(self._lpse, self.id_paket).get_detil() 375 | 376 | return self.pemenang 377 | 378 | @backoff.on_exception(backoff.fibo, 379 | (LpseServerExceptions, requests.exceptions.RequestException, 380 | requests.exceptions.ConnectionError), 381 | max_tries=3, jitter=None) 382 | def get_pemenang_berkontrak(self): 383 | self.pemenang_berkontrak = LpseDetilPemenangBerkontrakNonTenderParser(self._lpse, self.id_paket).get_detil() 384 | 385 | return self.pemenang_berkontrak 386 | 387 | @backoff.on_exception(backoff.fibo, 388 | (LpseServerExceptions, requests.exceptions.RequestException, 389 | requests.exceptions.ConnectionError), 390 | max_tries=3, jitter=None) 391 | def get_jadwal(self): 392 | self.jadwal = LpseDetilJadwalNonTenderParser(self._lpse, self.id_paket).get_detil() 393 | 394 | return self.jadwal 395 | 396 | 397 | class BaseLpseDetilParser(object): 398 | 399 | detil_path = None 400 | 401 | def __init__(self, lpse, id_paket): 402 | self.lpse = lpse 403 | self.id_paket = id_paket 404 | 405 | @backoff.on_exception(backoff.fibo, 406 | (LpseServerExceptions, requests.exceptions.RequestException, 407 | requests.exceptions.ConnectionError), 408 | max_tries=3, jitter=None) 409 | def get_detil(self): 410 | url = self.lpse.url+self.detil_path.format(self.id_paket) 411 | r = self.lpse.session.get( 412 | url, 413 | timeout=self.lpse.timeout, 414 | headers={ 415 | "referer": self.lpse.url 416 | } 417 | ) 418 | 419 | self.lpse.check_error(r) 420 | 421 | return self.parse_detil(r.content) 422 | 423 | @abstractmethod 424 | def parse_detil(self, content): 425 | pass 426 | 427 | @staticmethod 428 | def parse_currency(nilai): 429 | result = ''.join(re.findall(r'([\d+,])', nilai)).replace(',', '.') 430 | try: 431 | return float(result) 432 | except ValueError: 433 | return 0 434 | 435 | 436 | class LpseDetilPengumumanParser(BaseLpseDetilParser): 437 | 438 | detil_path = '/lelang/{}/pengumumanlelang' 439 | 440 | def parse_detil(self, content): 441 | soup = Bs(content, 'html5lib') 442 | 443 | content = soup.find('div', {'class': 'content'}) 444 | table = content.find('table', {'class': 'table-bordered'}).find('tbody') 445 | 446 | return self.parse_table(table) 447 | 448 | def parse_table(self, table): 449 | data = {} 450 | 451 | for tr in table.find_all('tr', recursive=False): 452 | ths = tr.find_all('th', recursive=False) 453 | tds = tr.find_all('td', recursive=False) 454 | 455 | for th, td in zip(ths, tds): 456 | data_key = '_'.join(th.text.strip().split()).lower() 457 | 458 | td_sub_table = td.find('table', recursive=False) 459 | 460 | if td_sub_table and data_key == 'rencana_umum_pengadaan': 461 | data_value = self.parse_rup(td_sub_table.find('tbody')) 462 | elif data_key == 'syarat_kualifikasi': 463 | # TODO: Buat parser syarat kualifikasi, tapi perlu tahu dulu kemungkinan format dan isinya 464 | continue 465 | elif data_key == 'lokasi_pekerjaan': 466 | data_value = self.parse_lokasi_pekerjaan(td) 467 | elif data_key in ('nilai_hps_paket', 'nilai_pagu_paket'): 468 | data_value = self.parse_currency(' '.join(td.text.strip().split())) 469 | elif data_key == 'peserta_tender': 470 | try: 471 | data_value = int(td.text.strip().split()[0]) 472 | except ValueError: 473 | data_value = -1 474 | elif data_key == 'nama_tender' or data_key == 'nama_paket': 475 | data_value, label = self.parse_nama_tender(td) 476 | data.update({ 477 | 'label_paket': label 478 | }) 479 | else: 480 | data_value = ' '.join(td.text.strip().split()) 481 | 482 | data.update({ 483 | data_key: data_value 484 | }) 485 | 486 | return data 487 | 488 | def parse_rup(self, tbody_rup): 489 | raw_data = [] 490 | for tr in tbody_rup.find_all('tr'): 491 | raw_data.append([' '.join(i.text.strip().split()) for i in tr.children if not isinstance(i, NavigableString)]) 492 | 493 | header = ['_'.join(i.split()).lower() for i in raw_data[0]] 494 | data = [] 495 | 496 | for row in raw_data[1:]: 497 | item = {} 498 | item.update(zip(header, row)) 499 | try: 500 | item.pop('') 501 | except KeyError: 502 | pass 503 | data.append(item) 504 | 505 | return data 506 | 507 | def parse_lokasi_pekerjaan(self, td_pekerjaan): 508 | return [' '.join(li.text.strip().split()) for li in td_pekerjaan.find_all('li')] 509 | 510 | def parse_nama_tender(self, element): 511 | label = [] 512 | for i in element.find_all('span'): 513 | label.append(i.text.strip()) 514 | i.decompose() 515 | 516 | text = element.text.strip() 517 | 518 | return text, label 519 | 520 | 521 | class LpseDetilPesertaParser(BaseLpseDetilParser): 522 | 523 | detil_path = '/lelang/{}/peserta' 524 | 525 | def parse_detil(self, content): 526 | soup = Bs(content, 'html5lib') 527 | table = soup.find('div', {'class': 'content'})\ 528 | .find('table') 529 | 530 | raw_data = [[i for i in tr.stripped_strings] for tr in table.find_all('tr')] 531 | 532 | header = ['_'.join(i.strip().split()).lower() for i in raw_data[0]] 533 | 534 | return [dict(zip(header, i)) for i in raw_data[1:]] 535 | 536 | 537 | class LpseDetilHasilEvaluasiParser(BaseLpseDetilParser): 538 | 539 | detil_path = '/evaluasi/{}/hasil' 540 | header_ref = { 541 | "a": "evaluasi_administrasi", 542 | "t": "evaluasi_teknis", 543 | "st": "skor_teknis", 544 | "p_1": "penawaran", 545 | "pt": "penawaran_terkoreksi", 546 | "hn": "hasil_negosiasi", 547 | "sh": "skor_harga", 548 | "sa": "skor_akhir", 549 | "b": "pembuktian_kualifikasi", 550 | "k": "evaluasi_kualifikasi", 551 | "sk": "skor_kualifikasi", 552 | "sb": "skor_pembuktian", 553 | "h": "evaluasi_harga_biaya", 554 | "p_2": "pemenang", 555 | "pk": "pemenang_berkontrak" 556 | } 557 | 558 | def parse_detil(self, content): 559 | soup = Bs(content, 'html5lib') 560 | table = soup.find('div', {'class': 'content'})\ 561 | .find('table') 562 | 563 | if not table: 564 | return 565 | 566 | is_header = True 567 | header = [] 568 | data = [] 569 | 570 | for tr in table.find_all('tr'): 571 | 572 | if is_header: 573 | header = ['_'.join(i.text.strip().split()).lower() for i in filter(lambda x: type(x) == bs4.element.Tag, tr.children)] 574 | 575 | # fix duplicate header key for p 576 | if header.count('p') > 1: 577 | first_p_idx = header.index('p') 578 | second_p_idx = header.index('p', first_p_idx + 1) 579 | header[first_p_idx] = 'p_1' 580 | header[second_p_idx] = 'p_2' 581 | 582 | # map header key to reference 583 | header = list(map(lambda x: self.header_ref.get(x, x), header)) 584 | 585 | is_header = False 586 | else: 587 | children = [self.parse_icon(i) for i in filter(lambda x: type(x) == bs4.element.Tag, tr.children)] 588 | children_dict = self.parse_children(dict(zip(header, children))) 589 | 590 | data.append(children_dict) 591 | 592 | return data 593 | 594 | def parse_children(self, children): 595 | for key, value in children.items(): 596 | if key.startswith('s'): 597 | try: 598 | children[key] = float(value) 599 | except ValueError: 600 | children[key] = 0.0 601 | elif key in ['penawaran', 'penawaran_terkoreksi', 'hasil_negosiasi']: 602 | children[key] = self.parse_currency(value) 603 | elif key in ['evaluasi_harga_biaya', 'pemenang', 'pemenang_berkontrak'] and children[key] != True: 604 | children[key] = False 605 | 606 | return children 607 | 608 | def parse_nama_npwp(self, peserta): 609 | return str(peserta).rsplit(' - ', maxsplit=1) 610 | 611 | def parse_icon(self, child): 612 | status = { 613 | 'fa-check': 1, 614 | 'fa-close': 0, 615 | 'fa-minus': None 616 | } 617 | 618 | icon = re.findall(r'fa (fa-.*)">', str(child)) 619 | if icon: 620 | return status[icon[0]] 621 | elif re.findall(r'star.gif', str(child)): 622 | return True 623 | return child.text.strip() 624 | 625 | 626 | class LpseDetilPemenangParser(BaseLpseDetilParser): 627 | 628 | detil_path = '/evaluasi/{}/pemenang' 629 | 630 | def __init__(self, lpse, id_paket, all=False, key='hasil_negosiasi'): 631 | super().__init__(lpse, id_paket) 632 | self.key = key 633 | self.all = all 634 | 635 | def parse_detil(self, content): 636 | soup = Bs(content, 'html5lib') 637 | 638 | try: 639 | table_pemenang = soup.find('div', {'class': 'content'})\ 640 | .table\ 641 | .tbody\ 642 | .find_all('tr', recursive=False)[-1]\ 643 | .find('table') 644 | except AttributeError: 645 | return 646 | 647 | if table_pemenang: 648 | header = ['_'.join(th.text.strip().split()).lower() for th in table_pemenang.find_all('th')] 649 | all_pemenang = [] 650 | 651 | for tr in table_pemenang.find_all('tr'): 652 | data = [' '.join(td.text.strip().split()) for td in tr.find_all('td')] 653 | 654 | if data: 655 | # set default dict untuk data pemenang karena nama header beda-beda 656 | # ref: https://github.com/wakataw/pyproc/pull/53 657 | pemenang = { 658 | 'nama_pemenang': None, 659 | 'alamat': None, 660 | 'npwp': None, 661 | 'harga_penawaran': 0, 662 | 'harga_terkoreksi': 0, 663 | 'hasil_negosiasi': 0, 664 | 'harga_negosiasi': 0 665 | } 666 | 667 | for i, v in zip(header, data): 668 | if 'reverse_auction' in i: 669 | i = 'hasil_negosiasi' 670 | 671 | pemenang[i] = self.parse_currency(v) \ 672 | if (v.lower().startswith('rp') or i.startswith('harga') or i.startswith('hasil')) else v 673 | 674 | all_pemenang.append(pemenang) 675 | 676 | if not all_pemenang: 677 | return [] 678 | elif self.all: 679 | all_pemenang = self._check_col_harga_negosiasi(all_pemenang) 680 | return all_pemenang 681 | else: 682 | try: 683 | return [min(all_pemenang, key=lambda x: x[self.key])] 684 | except KeyError: 685 | # fallback ke kolom harga penawaran untuk sorting jika kolom hasil negosiasi tidak ditemukan 686 | all_pemenang = self._check_col_harga_negosiasi(all_pemenang) 687 | return [min(all_pemenang, key=lambda x: x['harga_penawaran'])] 688 | return 689 | 690 | @staticmethod 691 | def _check_col_harga_negosiasi(all_pemenang): 692 | if 'hasil_negosiasi' not in all_pemenang[0]: 693 | all_pemenang[0]['hasil_negosiasi'] = '' 694 | 695 | return all_pemenang 696 | 697 | 698 | class LpseDetilPemenangBerkontrakParser(LpseDetilPemenangParser): 699 | 700 | detil_path = '/evaluasi/{}/pemenangberkontrak' 701 | 702 | 703 | class LpseDetilJadwalParser(BaseLpseDetilParser): 704 | 705 | detil_path = '/lelang/{}/jadwal' 706 | 707 | def parse_detil(self, content): 708 | soup = Bs(content, 'html5lib') 709 | table = soup.find('table') 710 | 711 | if not table: 712 | return 713 | 714 | is_header = True 715 | header = None 716 | jadwal = [] 717 | 718 | for tr in table.find_all('tr'): 719 | 720 | if is_header: 721 | header = ['_'.join(th.text.strip().split()).lower() for th in tr.find_all('th')] 722 | is_header = False 723 | else: 724 | data = [' '.join(td.text.strip().split()) for td in tr.find_all('td')] 725 | jadwal.append(dict(zip(header, data))) 726 | 727 | return jadwal 728 | 729 | 730 | class LpseDetilPengumumanNonTenderParser(LpseDetilPengumumanParser): 731 | 732 | detil_path = '/nontender/{}/pengumumanpl' 733 | 734 | 735 | class LpseDetilPesertaNonTenderParser(LpseDetilPesertaParser): 736 | 737 | detil_path = '/nontender/{}/peserta' 738 | 739 | 740 | class LpseDetilHasilEvaluasiNonTenderParser(LpseDetilHasilEvaluasiParser): 741 | 742 | detil_path = '/evaluasinontender/{}/hasil' 743 | 744 | 745 | class LpseDetilPemenangNonTenderParser(LpseDetilPemenangParser): 746 | 747 | detil_path = '/evaluasinontender/{}/pemenang' 748 | 749 | 750 | class LpseDetilPemenangBerkontrakNonTenderParser(LpseDetilPemenangNonTenderParser): 751 | 752 | detil_path = '/evaluasinontender/{}/pemenangberkontrak' 753 | 754 | 755 | class LpseDetilJadwalNonTenderParser(LpseDetilJadwalParser): 756 | 757 | detil_path = '/nontender/{}/jadwal' 758 | -------------------------------------------------------------------------------- /pyproc/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import re 4 | import logging 5 | import signal 6 | import sqlite3 7 | import threading 8 | import requests 9 | import pyproc 10 | import json 11 | from time import sleep 12 | from .exceptions import DownloaderContextException 13 | from . import text 14 | from datetime import datetime 15 | from pathlib import Path 16 | from urllib3.exceptions import InsecureRequestWarning 17 | from urllib3 import disable_warnings 18 | 19 | disable_warnings(InsecureRequestWarning) 20 | 21 | 22 | def set_up_log(level): 23 | """ 24 | Set log level berdasarkan argumen yang diberikan user 25 | :param level: 26 | :return: 27 | """ 28 | numeric_level = getattr(logging, level.upper(), None) 29 | if not isinstance(numeric_level, int): 30 | raise ValueError('Invalid log level: {}'.format(level)) 31 | 32 | logging.basicConfig(level=numeric_level, format='[%(asctime)s %(levelname)s] %(message)s') 33 | 34 | 35 | def check_new_version(): 36 | resp = requests.get('https://pypi.org/pypi/pyproc/json').json() 37 | current_version = pyproc.__version__ 38 | pypi_version = resp['info']['version'] 39 | status = current_version != pypi_version 40 | 41 | return status, current_version, pypi_version 42 | 43 | 44 | class IWillFindYouAndIWillKillYou: 45 | def __init__(self): 46 | signal.signal(signal.SIGINT, self.exit_gracefully) 47 | signal.signal(signal.SIGTERM, self.exit_gracefully) 48 | 49 | def exit_gracefully(self, *args): 50 | logging.debug("Get {} signal".format(args)) 51 | logging.error("Proses dibatalkan user") 52 | exit(1) 53 | 54 | 55 | class LpseHost(object): 56 | 57 | def __init__(self, args): 58 | self.is_valid = False 59 | self.error = None 60 | self.url, self.filename = self.parse_host(args) 61 | 62 | def parse_host(self, args): 63 | url_and_filename = args.split(';') 64 | logging.debug("Url dan Filename {}".format(url_and_filename)) 65 | 66 | # cek jika hasil split lebih < 1 atau lebih dari 2 67 | if len(url_and_filename) < 1 or len(url_and_filename) > 2: 68 | self.error = text.ERROR_CTX_HOST_FORMAT.format(args) 69 | return None, None 70 | 71 | # split url dan filename, jika filename tidak disediakan, generate filename berdasarkan hostname 72 | url = url_and_filename[0] 73 | try: 74 | filename = url_and_filename[1] 75 | except IndexError: 76 | filename = '_'.join(re.findall(r'([a-z0-9]+)', url.lower())) 77 | 78 | # set host is valid 79 | self.is_valid = True 80 | 81 | logging.debug("Hasil parsing {} & {}".format(url, filename)) 82 | return [url, Path.cwd() / filename] 83 | 84 | def __str__(self): 85 | return str(self.__dict__) 86 | 87 | 88 | class DownloaderContext(object): 89 | """ 90 | Objek untuk menyimpan downloader context 91 | """ 92 | 93 | def __init__(self, args): 94 | self.keyword = args.keyword 95 | self.tahun_anggaran = self.parse_tahun_anggaran(args.tahun_anggaran) 96 | self._kategori = args.kategori 97 | self.nama_penyedia = args.nama_penyedia 98 | self.chunk_size = args.chunk_size 99 | self.workers = 1 # hard coded worker to 1 100 | self.timeout = args.timeout 101 | self.non_tender = args.non_tender 102 | self.index_download_delay = args.index_download_delay 103 | self.keep_index = args.keep_index 104 | self.log_level = args.log 105 | self.output_format = args.output_format 106 | self.resume = args.resume 107 | self.separator = args.separator 108 | self.__lpse_host = args.lpse_host 109 | 110 | @property 111 | def kategori(self): 112 | try: 113 | return pyproc.JenisPengadaan[self._kategori] 114 | except KeyError: 115 | return None 116 | 117 | def parse_tahun_anggaran(self, tahun_anggaran): 118 | """ 119 | Parse tahun anggaran untuk menghasilkan list dari tahun anggaran yang akan diunduh 120 | :param tahun_anggaran: argumen tipe string dengan format X-Y (untuk range tahun anggaran) dan A,B,X,Z untuk beberapa tahun anggaran 121 | :return: list dari tahun anggaran 122 | """ 123 | list_tahun_anggaran = [] 124 | 125 | if tahun_anggaran.lower().strip() == 'all': 126 | return [None] 127 | 128 | tahun_anggaran = re.sub(r'\s+', '', tahun_anggaran) 129 | 130 | # split argumen tahun anggaran berdasarkan separator koma 131 | for i in tahun_anggaran.split(','): 132 | try: 133 | # untuk setiap item, split berdasarkan dash lalu convert integer 134 | # raise exception jika proses convert gagal, atau nilai tahun tidak berada antara 2000 135 | # dan tahun berjalan 136 | range_tahun = list(map(lambda x: int(x), i.split('-'))) 137 | 138 | for tahun in range(min(range_tahun), max(range_tahun) + 1): 139 | if not 2000 < tahun <= datetime.now().year + 5: 140 | raise DownloaderContextException(text.ERROR_CTX_RANGE_TAHUN.format(datetime.now().year + 5)) 141 | list_tahun_anggaran.append(tahun) 142 | except ValueError: 143 | raise DownloaderContextException(text.ERROR_CTX_TAHUN_ANGGARAN) 144 | 145 | list_tahun_anggaran = list(set(list_tahun_anggaran)) 146 | list_tahun_anggaran.sort() 147 | 148 | if not list_tahun_anggaran: 149 | raise DownloaderContextException(text.ERROR_CTX_TAHUN_ANGGARAN) 150 | 151 | return list_tahun_anggaran 152 | 153 | def __get_host_from_file(self, file): 154 | logging.debug("List LPSE host dari file") 155 | with file.open('r') as f: 156 | for line in f: 157 | logging.debug("Parsing host {}".format(line.strip())) 158 | yield LpseHost(line.strip()) 159 | 160 | def __get_host_from_argumen(self, arg): 161 | logging.debug("List LPSE host dari argumen {}".format(arg)) 162 | for line in arg.strip().split(','): 163 | logging.debug("Parsing host {}".format(line)) 164 | yield LpseHost(line) 165 | 166 | @property 167 | def lpse_host_list(self): 168 | """ 169 | Parse argument host, asumsi awal nilai yang diberikan oleh user adalah nama file. Jika file tidak ditemukan, 170 | nilai tersebut dianggap sebagai host name dari aplikasi SPSE instansi. 171 | :return: 172 | """ 173 | lpse_host_file = Path.cwd() / self.__lpse_host 174 | try: 175 | host_is_file = lpse_host_file.is_file() 176 | except OSError: 177 | host_is_file = False 178 | 179 | if host_is_file: 180 | host_generator = self.__get_host_from_file(lpse_host_file) 181 | else: 182 | host_generator = self.__get_host_from_argumen(self.__lpse_host) 183 | 184 | return host_generator 185 | 186 | def __str__(self): 187 | return str(self.__dict__) 188 | 189 | 190 | class LpseIndex: 191 | def __init__(self, kwargs): 192 | self.row_id = kwargs['row_id'] 193 | self.id_paket = kwargs['id_paket'] 194 | self.jenis_paket = kwargs['jenis_paket'] 195 | self.kategori_tahun_anggaran = kwargs['kategori_tahun_anggaran'] 196 | self.status = kwargs['status'] 197 | self.detail = self.parse_detail(kwargs['detail']) 198 | 199 | @staticmethod 200 | def parse_detail(detail): 201 | try: 202 | return json.loads(detail) 203 | except TypeError: 204 | return {} 205 | 206 | def __str__(self): 207 | return str(self.__dict__) 208 | 209 | 210 | class IndexDownloader(object): 211 | __tahun_anggaran_pattern = re.compile('(\d+)') 212 | db = None 213 | db_status_for_resume = False 214 | db_file = None 215 | lpse = None 216 | 217 | def __init__(self, ctx, lpse_host): 218 | self.ctx = ctx 219 | self.lpse_host = lpse_host 220 | self.lpse = pyproc.Lpse(lpse_host.url, timeout=ctx.timeout) 221 | self.db = self.get_index_db(self.lpse_host.filename) 222 | 223 | logging.info("{} - Mulai pengunduhan data {} tahun {}".format( 224 | lpse_host.url, "Pengadaan Langsung" if self.ctx.non_tender else "Tender", 225 | ', '.join(map(str, self.ctx.tahun_anggaran)) if self.ctx.tahun_anggaran[0] is not None else 'ALL' 226 | )) 227 | 228 | def __check_index_db(self, db): 229 | status = False 230 | try: 231 | total = db.execute("SELECT COUNT(1) FROM INDEX_PAKET").fetchone()[0] 232 | logging.info("{} - total previous index {}".format(self.lpse_host.url, total)) 233 | if total > 0: 234 | status = True 235 | except Exception as e: 236 | logging.error("{} - check index db gagal, error: {}".format(self.lpse_host.url, e)) 237 | status = False 238 | 239 | logging.info("{} - status previous index db {}".format(self.lpse_host.url, status)) 240 | self.db_status_for_resume = status 241 | return status 242 | 243 | def get_index_db(self, filename): 244 | """ 245 | Generate index database and table 246 | table columns: 247 | - data_id, concat(jenis, idpaket). 248 | - nama_instansi 249 | - jenis_paket 250 | - kategori_tahun_anggaran 251 | - status (0 belum download, 1 oke) 252 | :param filename: Database Filename 253 | :return: SQLite database object 254 | """ 255 | db_filename = filename.name + ".idx" 256 | self.db_file = Path.cwd() / db_filename 257 | db = sqlite3.connect(str(self.db_file), check_same_thread=False) 258 | 259 | if self.ctx.resume and self.__check_index_db(db): 260 | logging.info("{} - skip db init, melanjutkan proses".format(self.lpse_host.url)) 261 | return db 262 | 263 | logging.debug("Generate index database: {}".format(self.db_file.name)) 264 | logging.debug("Create index table") 265 | 266 | try: 267 | db.execute("DROP TABLE IF EXISTS INDEX_PAKET") 268 | db.execute("""CREATE TABLE INDEX_PAKET 269 | ( 270 | ROW_ID varchar(100) unique primary key, 271 | ID_PAKET VARCHAR(50), 272 | JENIS_PAKET VARCHAR(32), 273 | KATEGORI_TAHUN_ANGGARAN varchar (100), 274 | STATUS int default 0, 275 | DETAIL text 276 | );""") 277 | db.execute("CREATE INDEX INDEX_PAKET_KATEGORI_TAHUN_ANGGARAN_IDX ON INDEX_PAKET(KATEGORI_TAHUN_ANGGARAN);") 278 | db.execute("CREATE INDEX INDEX_PAKET_ID_PAKET_IDX ON INDEX_PAKET(ID_PAKET);") 279 | db.execute("CREATE INDEX INDEX_PAKET_JENIS_PAKET ON INDEX_PAKET(JENIS_PAKET);") 280 | db.execute("CREATE INDEX INDEX_PAKET_STATUS_IDX ON INDEX_PAKET(STATUS);") 281 | except sqlite3.OperationalError as e: 282 | if 'INDEX_PAKET already exists' in str(e): 283 | pass 284 | else: 285 | raise e 286 | 287 | db.commit() 288 | 289 | return db 290 | 291 | def get_jenis_paket(self): 292 | """ 293 | Wrapper variable jenis paket 294 | :return: 295 | """ 296 | if self.ctx.non_tender: 297 | jenis_paket = 'pl' 298 | else: 299 | jenis_paket = 'lelang' 300 | 301 | return jenis_paket 302 | 303 | def get_total_package(self, tahun): 304 | """ 305 | Fungsi untuk mendapatkan total data dengan melakukan requests dengan length 0 data 306 | :return: Integer jumlah data 307 | """ 308 | jenis_paket = self.get_jenis_paket() 309 | 310 | data = self.lpse.get_paket(jenis_paket=jenis_paket, kategori=self.ctx.kategori, 311 | nama_penyedia=self.ctx.nama_penyedia, search_keyword=self.ctx.keyword, 312 | tahun=tahun) 313 | 314 | logging.debug("Jumlah record {}".format(str(data))) 315 | return data['recordsFiltered'] 316 | 317 | def start(self): 318 | """ 319 | Start index downloader 320 | :return: 321 | """ 322 | if self.ctx.resume and self.db_status_for_resume: 323 | return 324 | 325 | for tahun in self.ctx.tahun_anggaran: 326 | total = self.get_total_package(tahun=tahun) 327 | batch_total = -(-total // self.ctx.chunk_size) 328 | data_count = 0 329 | 330 | for batch in range(batch_total): 331 | data = self.lpse.get_paket(jenis_paket=self.get_jenis_paket(), start=batch * self.ctx.chunk_size, 332 | length=self.ctx.chunk_size, kategori=self.ctx.kategori, 333 | search_keyword=self.ctx.keyword, nama_penyedia=self.ctx.nama_penyedia, 334 | data_only=True, tahun=tahun) 335 | 336 | if not data: 337 | break 338 | 339 | self.db.executemany("INSERT OR IGNORE INTO INDEX_PAKET VALUES(?, ?, ?, ?, ?, ?)", 340 | self.convert_index_for_db(data)) 341 | self.db.commit() 342 | 343 | # update data count 344 | data_count += len(data) 345 | logging.info( 346 | "{host} - TA {tahun} - Indexing halaman ke-{batch}.".format( 347 | host=self.lpse_host.url, 348 | batch=batch + 1, 349 | tahun=tahun if tahun is not None else 'ALL' 350 | ) 351 | ) 352 | 353 | sleep(self.ctx.index_download_delay) 354 | 355 | def convert_index_for_db(self, data): 356 | """ 357 | Fungsi untuk menyesuaikan format index dari aplikasi spse ke database 358 | :param data: 359 | :return: 360 | """ 361 | for row in data: 362 | yield [ 363 | '{}-{}'.format('nontender' if self.ctx.non_tender else 'tender', row[0]), 364 | row[0], 365 | 'nontender' if self.ctx.non_tender else 'tender', 366 | row[6] if self.ctx.non_tender else row[8], 367 | 0, 368 | None # detail paket kosong 369 | ] 370 | 371 | @staticmethod 372 | def index_factory(cursor, row): 373 | d = {} 374 | for idx, col in enumerate(cursor.description): 375 | d[col[0].lower()] = row[idx] 376 | 377 | return LpseIndex(d) 378 | 379 | def get_index(self): 380 | logging.debug("[SQL] get index from database") 381 | result = self.db.execute("SELECT * FROM INDEX_PAKET WHERE STATUS = 0") 382 | 383 | for row in result.fetchall(): 384 | row = self.index_factory(result, row) 385 | 386 | logging.debug("row data {}".format(row)) 387 | yield row 388 | 389 | def resume(self): 390 | """ 391 | Fungsi untuk melanjutkan proses pengunduhan index berdasarkan kondisi terakhir 392 | :return: 393 | """ 394 | pass 395 | 396 | def __del__(self): 397 | """ 398 | Make sure everything is closed when object is garbage collected 399 | :return: 400 | """ 401 | if self.db: 402 | self.db.close() 403 | del self.db 404 | 405 | if self.lpse is not None: 406 | del self.lpse 407 | 408 | 409 | class DetailDownloader(object): 410 | 411 | def __init__(self, index_downloader): 412 | self.index_downloader = index_downloader 413 | self.lock = threading.Lock() 414 | 415 | logging.info("{} - Mulai pengunduhan detail data".format(self.index_downloader.lpse_host.url)) 416 | 417 | def __pre_process_index_db(self): 418 | total = self.index_downloader.db.execute( 419 | """SELECT COUNT(1) FROM INDEX_PAKET WHERE STATUS = 0""" 420 | ).fetchone()[0] 421 | deleted = 0 422 | 423 | return total, deleted 424 | 425 | def get_detail(self, lpse_index): 426 | """ 427 | Get detail paket berdasarkan paket ID 428 | :param package_id: 429 | :return: 430 | """ 431 | logging.debug("[DETAIL DOWNLOADER] download detail for {}".format(lpse_index)) 432 | if self.index_downloader.ctx.non_tender: 433 | package_detail = self.index_downloader.lpse.detil_paket_non_tender(lpse_index.id_paket) 434 | else: 435 | package_detail = self.index_downloader.lpse.detil_paket_tender(lpse_index.id_paket) 436 | 437 | info = package_detail.get_all_detil() 438 | 439 | if info['error']: 440 | logging.error('{} - Terjadi kesalahan untuk paket {}'.format( 441 | self.index_downloader.lpse_host.url, info['error_message'] 442 | )) 443 | lpse_index.detail = package_detail 444 | 445 | logging.debug("[DETAIL DOWNLOADER] update database detail data") 446 | self.update_detail(lpse_index) 447 | 448 | def update_detail(self, lpse_index): 449 | with self.lock: 450 | logging.debug("[DETAIL DOWNLOADER] update detail data {}".format(lpse_index)) 451 | self.index_downloader.db.execute( 452 | "UPDATE INDEX_PAKET SET DETAIL = ?, STATUS = 1 WHERE ROW_ID = ?", 453 | (json.dumps(lpse_index.detail.todict()), lpse_index.row_id) 454 | ) 455 | self.index_downloader.db.commit() 456 | 457 | def start(self): 458 | total, deleted = self.__pre_process_index_db() 459 | total_to_download = total - deleted 460 | index_generator = self.index_downloader.get_index() 461 | total_downloaded = 0 462 | 463 | while True: 464 | lpse_index = [] 465 | 466 | for i in range(self.index_downloader.ctx.workers): 467 | try: 468 | lpse_index.append(index_generator.__next__()) 469 | except StopIteration: 470 | pass 471 | 472 | logging.debug("[DETAIL DOWNLOADER] starting batch for {}".format(lpse_index)) 473 | 474 | threads = [] 475 | 476 | for i, index in enumerate(lpse_index): 477 | t = threading.Thread(target=self.get_detail, args=(index,), name='detail-thread-{}'.format(i)) 478 | t.start() 479 | logging.debug("[DETAIL DOWNLOADER] {} started".format(t.name)) 480 | threads.append(t) 481 | 482 | for t in threads: 483 | logging.debug("[DETAIL DOWNLOADER] thread {} join".format(t.name)) 484 | t.join() 485 | 486 | for t in threads: 487 | logging.debug("[DETAIL DOWNLOADER] thread {} deleted".format(t.name)) 488 | del t 489 | 490 | del threads 491 | 492 | total_downloaded += len(lpse_index) 493 | 494 | if self.index_downloader.ctx.log_level == 'INFO': 495 | print( 496 | "\rMemproses {}/{} ({:,.2f}%) data".format( 497 | total_downloaded, 498 | total_to_download, 499 | total_downloaded/total_to_download*100 if total_to_download > 0 else 0.0 500 | ), 501 | end=' ' 502 | ) 503 | 504 | if len(lpse_index) != self.index_downloader.ctx.workers: 505 | break 506 | 507 | print() 508 | logging.info("{} - {} data selesai diproses".format(self.index_downloader.lpse_host.url, total_downloaded)) 509 | 510 | 511 | class Exporter: 512 | def __init__(self, index_downloader): 513 | self.index_downloader = index_downloader 514 | 515 | def get_detail(self): 516 | """ 517 | Query data detail dari database untuk diekspor 518 | :return: generator result row 519 | """ 520 | logging.info("{} - Export Data".format(self.index_downloader.lpse_host.url)) 521 | result = self.index_downloader.db.execute("SELECT * from INDEX_PAKET WHERE STATUS = 1") 522 | for data in result.fetchall(): 523 | data = self.index_downloader.index_factory(result, data) 524 | yield data.detail 525 | 526 | def get_file_obj(self, ext): 527 | """ 528 | Fungsi untuk mempermudah inisiasi objek file untuk export data 529 | :param ext: 530 | :return: file object 531 | """ 532 | filename = self.index_downloader.lpse_host.filename.name + '.' + ext 533 | file_obj = Path.cwd() / filename 534 | 535 | return file_obj 536 | 537 | def to_csv(self, delimiter): 538 | """ 539 | Export detail data ke csv 540 | :return: 541 | """ 542 | is_tender = not self.index_downloader.ctx.non_tender 543 | header = [ 544 | 'id_paket', 545 | 'nama_tender', 546 | 'tanggal_pembuatan', 547 | 'tahap_tender_saat_ini', 548 | 'k/l/pd', 549 | 'satuan_kerja', 550 | 'jenis_pengadaan', 551 | 'metode_pengadaan', 552 | 'tahun_anggaran', 553 | 'nilai_pagu_paket', 554 | 'nilai_hps_paket', 555 | 'jenis_kontrak', 556 | 'kualifikasi_usaha', 557 | 'peserta_tender', 558 | 'khusus_pelaku_usaha_oap', 559 | 'lokasi_pekerjaan', 560 | 'label_paket', 561 | ] 562 | 563 | if not is_tender: 564 | header[1] = 'nama_paket' 565 | header[3] = 'tahap_paket_saat_ini' 566 | header[7] = 'metode_pengadaan' 567 | header[-4] = 'peserta_non_tender' 568 | 569 | json_data_header = ['hasil_evaluasi', 'pemenang', 'pemenang_berkontrak', 'jadwal', 'peserta'] 570 | 571 | with self.get_file_obj('csv').open('w', newline='', encoding='utf-8') as f: 572 | writer = csv.writer(f, delimiter=delimiter) 573 | writer.writerow(['url'] + header + json_data_header) 574 | 575 | for item in self.get_detail(): 576 | if item.get('pengumuman'): 577 | base_data = [item.get('pengumuman').get(i) for i in header[1:]] 578 | base_data[-1] = json.dumps(base_data[-1]) 579 | base_data[-2] = json.dumps(base_data[-2]) 580 | else: 581 | base_data = [None]*len(header[1:]) 582 | 583 | writer.writerow( 584 | [self.index_downloader.lpse_host.url, item.get('id_paket')] + 585 | base_data + 586 | [ 587 | json.dumps(item.get('hasil')), 588 | json.dumps(item.get('pemenang')), 589 | json.dumps(item.get('pemenang_berkontrak')), 590 | json.dumps(item.get('peserta')), 591 | json.dumps(item.get('jadwal')), 592 | ], 593 | ) 594 | 595 | def to_json(self): 596 | """ 597 | Export detail data ke format json 598 | :return: 599 | """ 600 | with self.get_file_obj('json').open('w') as f: 601 | f.write("[") 602 | for item in self.get_detail(): 603 | f.write(json.dumps(item)) 604 | f.write(",") 605 | f.seek(f.tell() - 1) 606 | f.write("]") 607 | 608 | 609 | class QualityAssurance: 610 | 611 | def __init__(self, index_downloader): 612 | self.index_downloader = index_downloader 613 | 614 | def check(self): 615 | all_data = self.index_downloader.db.execute("SELECT STATUS, COUNT(1) FROM INDEX_PAKET GROUP BY STATUS") 616 | result = dict(all_data.fetchall()) 617 | success = result.get(1, 0) 618 | fail = result.get(0, 0) 619 | total = sum(result.values()) 620 | 621 | return total, success, fail 622 | 623 | 624 | class Downloader(object): 625 | ctx = None 626 | 627 | @staticmethod 628 | def get_args_from_interactive_menu(): 629 | args = [ 630 | input("Alamat LPSE: "), 631 | "--tahun-anggaran", 632 | ''.join(input("Tahun Anggaran [X atau X,Y,Z atau X-Z]: ").strip().split()), 633 | "--keyword", 634 | input("Kata kunci pencarian [default kosong]: ") 635 | ] 636 | is_tender = input("Jenis pengadan [tender/pl]: ").lower().strip() 637 | 638 | if is_tender in ['tender', 'pl']: 639 | if is_tender == 'pl': 640 | args.append('--non-tender') 641 | else: 642 | print("Pilihan {} tidak valid".format(is_tender)) 643 | exit(1) 644 | 645 | return args 646 | 647 | def get_ctx(self, sys_args): 648 | """ 649 | Parse command line argument. 650 | -h, --help : menampilkan pesan bantuan 651 | -k, --keyword : filter pencarian index paket berdasarkan kata kunci 652 | -t, --tahun-anggaran : filter download detail berdasarkan tahun anggaran, 653 | format X-Y atau X;Y;Z 654 | --kategori : filter pencarian index paket berdasarkan kategori 655 | --nama-penyedia : filter pencarian index paket berdasarkan nama penyedia 656 | -c, --chunk-size : jumlah index per-halaman yang diunduh dalam satu iterasi 657 | -w, --workers : jumlah workers yang berjalan secara paralel untuk mengunduh detail paket 658 | -x, --timeout : waktu timeout respon dari server dalam detik 659 | -n, --non-tender : flag untuk melakukan pengunduhan data paket pengadaan langsung 660 | -d, --index-download-delay : waktu delay untuk setiap iterasi halaman index dalam detik 661 | -k, --keep-workdir : tidak menghapus working direktori dari downloader 662 | -f, --force : menjalankan program tanpa memperhatikan cache yang sudah ada sebelumnya 663 | --clear : membersihkan folder cache di direktori home 664 | LPSE_HOST : host LPSE atau file teks berisi daftar host LPSE. 665 | Jika terdapat file teks dengan nama yang sama dengan hostname LPSE, prioritas 666 | pertama dari program adalah membaca file. 667 | :return: Lpse Downloader Context 668 | """ 669 | 670 | # if there is no argument, show interactive menu 671 | if len(sys_args) == 0: 672 | sys_args = self.get_args_from_interactive_menu() 673 | 674 | parser = argparse.ArgumentParser() 675 | parser.add_argument('lpse_host', type=str, help=text.HELP_LPSE_HOST) 676 | parser.add_argument('-k', '--keyword', type=str, default="", help=text.HELP_KEYWORD) 677 | parser.add_argument('-t', '--tahun-anggaran', type=str, default="{}".format(datetime.now().year), 678 | help=text.HELP_TAHUN_ANGGARAN) 679 | parser.add_argument('--kategori', 680 | choices=[ 681 | "PENGADAAN_BARANG", 682 | "JASA_KONSULTANSI_BADAN_USAHA_NON_KONSTRUKSI", 683 | "PEKERJAAN_KONSTRUKSI", 684 | "JASA_LAINNYA", 685 | "JASA_KONSULTANSI_PERORANGAN", 686 | "JASA_KONSULTANSI_BADAN_USAHA_KONSTRUKSI", 687 | None 688 | ], 689 | help=text.HELP_KATEGORI, default=None) 690 | parser.add_argument('--nama-penyedia', type=str, default=None, help=text.HELP_PENYEDIA) 691 | parser.add_argument('-c', '--chunk-size', type=int, default=100, help=text.HELP_CHUNK_SIZE) 692 | parser.add_argument('-w', '--workers', type=int, default=8, help=text.HELP_WORKERS) 693 | parser.add_argument('-x', '--timeout', type=int, default=30, help=text.HELP_TIMEOUT) 694 | parser.add_argument('-n', '--non-tender', action='store_true', help=text.HELP_NONTENDER) 695 | parser.add_argument('-d', '--index-download-delay', type=int, default=1, help=text.HELP_INDEX_DOWNLOAD_DELAY) 696 | parser.add_argument('-o', '--output-format', choices=['json', 'csv'], default='csv', help=text.HELP_OUTPUT) 697 | parser.add_argument('--keep-index', action='store_true', help=text.HELP_KEEP) 698 | parser.add_argument('-r', '--resume', action='store_true', help=text.HELP_RESUME) 699 | parser.add_argument('-s', '--separator', type=str, default=";", help=text.HELP_CSV_SEPARATOR) 700 | parser.add_argument('--log', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default='INFO', 701 | help=text.HELP_LOG_LEVEL) 702 | 703 | args = parser.parse_args(sys_args) 704 | 705 | set_up_log(args.log) 706 | 707 | logging.debug('Parsing context') 708 | 709 | self.ctx = DownloaderContext(args) 710 | 711 | return self.ctx 712 | 713 | def start(self): 714 | for lpse_host in self.ctx.lpse_host_list: 715 | if not lpse_host.is_valid: 716 | logging.error("{} - {}".format(lpse_host.url, lpse_host.error)) 717 | continue 718 | 719 | try: 720 | index_downloader = IndexDownloader(self.ctx, lpse_host) 721 | index_downloader.start() 722 | except Exception as e: 723 | logging.error("{} - Index Downloader Error {} {}".format(lpse_host.url, e.__class__, str(e))) 724 | continue 725 | 726 | try: 727 | detail_downloader = DetailDownloader(index_downloader) 728 | detail_downloader.start() 729 | except Exception as e: 730 | logging.error("{} - Detail Downloader Error {} {}".format(lpse_host.url, e.__class__, str(e))) 731 | continue 732 | 733 | exporter = Exporter(index_downloader) 734 | 735 | if self.ctx.output_format == 'json': 736 | exporter.to_json() 737 | elif self.ctx.output_format == 'csv': 738 | exporter.to_csv(delimiter=self.ctx.separator) 739 | 740 | qa = QualityAssurance(index_downloader) 741 | total, success, fail = qa.check() 742 | 743 | with open('statistic.txt', 'a') as f: 744 | f.write("{} total={} success={} fail={} tahun={}\n".format( 745 | lpse_host.url, total, success, fail, self.ctx.tahun_anggaran 746 | )) 747 | 748 | if total == 0: 749 | logging.info("Proses selesai, tidak ada data yang ditemukan.") 750 | elif fail == 0: 751 | logging.info("Proses selesai: {}/{} ({:,.2f}%) terunduh".format(success, total, success/total*100)) 752 | else: 753 | logging.error("Proses gagal: {}/{} ({:,.2f}%).".format(fail, total, fail/total*100)) 754 | logging.info("Jalankan perintah dengan parameter --resume / -r untuk mengunduh ulang paket yang gagal") 755 | 756 | if not index_downloader.ctx.keep_index and fail == 0: 757 | logging.info("{} - membersihkan direktori".format(lpse_host.url)) 758 | index_downloader.db.close() 759 | try: 760 | index_downloader.db_file.unlink() 761 | except FileNotFoundError: 762 | pass 763 | 764 | del index_downloader 765 | del detail_downloader 766 | del exporter 767 | 768 | 769 | def main(): 770 | import sys 771 | 772 | IWillFindYouAndIWillKillYou() 773 | 774 | print(text.INFO) 775 | 776 | downloader = Downloader() 777 | downloader.get_ctx(sys.argv[1:]) 778 | 779 | try: 780 | status, current, new = check_new_version() 781 | if status: 782 | logging.info(f"Anda menggunakan PyProc versi {current}, " 783 | f"tersedia versi baru {new}. " 784 | f"Mohon untuk memperbarui aplikasi.") 785 | 786 | if len(sys.argv) > 1 and sys.argv[1] == 'daftarlpse': 787 | pyproc.utils.download_host(logging) 788 | exit(0) 789 | else: 790 | downloader.start() 791 | except Exception as e: 792 | logging.error(f"Terjadi galat {e}") 793 | finally: 794 | del downloader 795 | 796 | 797 | if __name__ == '__main__': 798 | main() 799 | --------------------------------------------------------------------------------