├── bob.txt ├── .dockerignore ├── localConfig.json ├── Dockerfile ├── .github └── workflows │ └── docker-build.yml ├── requirements.txt ├── .gitignore ├── README.md ├── Transaction.py ├── Launcher.py ├── App.py ├── Validator.py └── Scraper.py /bob.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .gitignore -------------------------------------------------------------------------------- /localConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "ExordeApp": { 3 | "ERCAddress": "", 4 | "MainERCAddress": "", 5 | "Updated": 0, 6 | "SendCountryInfo": 1, 7 | "lastInfo": "", 8 | "lastUpdate": "1.3.5b" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | #RUN adduser --uid 2000 --gecos "" --disabled-password --quiet exorde_user 4 | #COPY --chown=exorde_user:exorde_user Launcher.py requirements.txt localConfig.json bob.txt / 5 | COPY Launcher.py requirements.txt localConfig.json bob.txt / 6 | RUN apt-get update \ 7 | && apt-get install --no-install-recommends --yes build-essential procps\ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | RUN pip install --no-cache-dir -r requirements.txt 11 | #USER exorde_user 12 | 13 | ENTRYPOINT [ "python", "-u", "./Launcher.py"] 14 | -------------------------------------------------------------------------------- /.github/workflows/docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Build a container image of Exorde CLI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build-deploy: 11 | name: Build and deploy 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout repository 15 | uses: actions/checkout@v3 16 | with: 17 | fetch-depth: 0 18 | - name: Get commit's short sha 19 | id: vars 20 | run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" 21 | - name: Log into Docker HUB 22 | uses: docker/login-action@v2 23 | with: 24 | username: ${{ secrets.DOCKERHUB_LOGIN }} 25 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 26 | - name: Set up QEMU for ARM build 27 | uses: docker/setup-qemu-action@v2 28 | - name: Setup Docker buildx 29 | uses: docker/setup-buildx-action@v2 30 | - name: Build and push Docker image 31 | id: build-and-push 32 | uses: docker/build-push-action@v3 33 | with: 34 | context: . 35 | push: true 36 | platforms: | 37 | linux/amd64 38 | linux/arm64 39 | tags: | 40 | exordelabs/exorde-cli:${{ steps.vars.outputs.sha_short }} 41 | exordelabs/exorde-cli:latest 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | web3==5.31.1 2 | aiohttp==3.8.3 3 | aiosignal==1.2.0 4 | appdirs==1.4.4 5 | async-timeout==4.0.* 6 | attrs==22.1.0 7 | base58==2.1.1 8 | beautifulsoup4==4.11.* 9 | bitarray==2.6.0 10 | boto3==1.26.* 11 | botocore==1.29.3 12 | bs4==0.0.1 13 | certifi==2022.9.* 14 | charset-normalizer==2.1.* 15 | click==8.1.3 16 | cssselect==1.2.0 17 | cytoolz==0.12.0 18 | dateparser==1.1.3 19 | demjson3==3.0.6 20 | eth-abi==2.2.0 21 | eth-account==0.5.9 22 | eth-hash==0.5.0 23 | eth-keyfile==0.5.1 24 | eth-keys==0.3.4 25 | eth-rlp==0.2.1 26 | eth-typing==2.3.0 27 | eth-utils==1.9.5 28 | facebook-scraper==0.2.59 29 | fake-useragent==0.1.* 30 | fasttext==0.9.* 31 | fasttext-langdetect==1.0.3 32 | filelock==3.8.0 33 | frozenlist==1.3.1 34 | geographiclib==1.52 35 | geopy==2.2.0 36 | hexbytes==0.3.0 37 | idna==3.4 38 | importlib-metadata==5.0.* 39 | ipfshttpclient==0.8.0a2 40 | iso-639==0.4.5 41 | jellyfish==0.9.* 42 | jmespath==1.0.1 43 | jsonschema==4.17.* 44 | lru-dict==1.1.8 45 | lxml==4.9.1 46 | multiaddr==0.0.9 47 | multidict==6.0.* 48 | netaddr==0.8.0 49 | networkx==2.8.* 50 | numpy==1.* 51 | pandas==1.* 52 | parse==1.19.0 53 | parsimonious==0.8.1 54 | pip==22.3 55 | protobuf==3.19.5 56 | pybind11==2.10.1 57 | pycryptodome==3.15.0 58 | pyee==8.2.2 59 | pyppeteer 60 | pyquery==1.4.3 61 | pyrsistent==0.19.2 62 | PySocks==1.7.1 63 | python-dateutil==2.8.2 64 | pytz==2022.6 65 | pytz-deprecation-shim==0.1.0.post0 66 | regex==2022.3.2 67 | requests==2.28.* 68 | requests-file==1.5.1 69 | requests-html==0.10.0 70 | rlp==2.0.1 71 | s3transfer==0.6.0 72 | scipy==1.9.* 73 | segtok==1.5.11 74 | setuptools==65.5.0 75 | six==1.16.0 76 | snscrape==0.4.3.* 77 | soupsieve==2.3.2.post1 78 | tabulate==0.9.0 79 | tldextract==3.4.0 80 | toolz==0.12.0 81 | tqdm==4.64.* 82 | tzdata==2022.6 83 | tzlocal==4.2 84 | urllib3==1.26.12 85 | varint==1.0.2 86 | w3lib==2.0.1 87 | websockets 88 | wget==3.2 89 | wheel==0.37.1 90 | yake==0.4.8 91 | yarl==1.8.1 92 | zipp==3.10.0 93 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | 162 | # ignoring .git folder 163 | .git/ 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Exorde Participation Module CLI 2 | 3 | The full documentation of Exorde Participation Module CLI is available on https://docs.exorde.network. 4 | 5 | ## Instructions 6 | 7 | You have several choices to run the Exorde CLI: 8 | 9 | - Run from sources inside a virtual Python environment 10 | - Run from a Docker image 11 | 12 | Exorde CLI does not come with a GUI, it aims to be used by advanced users who want to run it inside a terminal. The installation process assume that users who run Exorde CLI are familiar with command lines. 13 | 14 | Using the container image is the recommanded way to run Exorde CLI, as it avoid dependencies issues, handles automatic restart in case of failure/application update and make multi easier to run multiple instances of the application. 15 | 16 | ## Requirements 17 | 18 | - Windows 8.1/10/11 or Linux or macOS 19 | - 4 GB RAM 20 | - 2 CPU cores 21 | - 1 GB storage (HDD or SSD) 22 | 23 | ## Quickstart using Python and Conda on Linux/macOS 24 | 25 | 1. Follow the [Conda's documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html#regular-installation) to install it on your system. 26 | 2. Download and unzip the latest version of Exorde CLI: 27 | ```bash 28 | wget https://github.com/exorde-labs/ExordeModuleCLI/archive/refs/heads/main.zip \ 29 | --output-document=ExordeModuleCLI.zip \ 30 | && unzip ExordeModuleCLI.zip \ 31 | && rm ExordeModuleCLI.zip \ 32 | && mv ExordeModuleCLI-main ExordeModuleCLI 33 | ``` 34 | 3. Go to the root of Exorde CLI folder: 35 | ```bash 36 | cd ExordeModuleCLI 37 | ``` 38 | 4. Create and activate a new virtual conda environment with Python 3.9 as the environment executable (exorde-env is an example name): 39 | ```bash 40 | conda create --name exorde-env python=3.9 41 | conda activate exorde-env 42 | ``` 43 | 5. Upgrade Pip : 44 | ```bash 45 | pip install --upgrade pip 46 | ``` 47 | 6. Install the required packages: 48 | ```bash 49 | pip install -r requirements.txt 50 | ``` 51 | 7. Run the program: 52 | 53 | ```bash 54 | python Launcher.py -m -l 55 | ``` 56 | 57 | Usage example: 58 | 59 | ```bash 60 | python Launcher.py -m 0x0F67059ea5c125104E46B46769184dB6DC405C42 -l 2 61 | ``` 62 | 63 | For more detailled informations, please read the [full documentation](https://docs.exorde.network). 64 | 65 | ## Quickstart using Docker on a Linux VPS 66 | 67 | 1. Install [Docker](https://docs.docker.com/engine/install/). 68 | 2. Run the program in background with autorestart: 69 | 70 | ```bash 71 | docker run \ 72 | -d \ 73 | --restart unless-stopped \ 74 | --pull always \ 75 | --name \ 76 | exordelabs/exorde-cli \ 77 | -m \ 78 | -l 79 | ``` 80 | 81 | Usage example: 82 | 83 | ```bash 84 | docker run \ 85 | -d \ 86 | --restart unless-stopped \ 87 | --pull always \ 88 | --name exorde-cli \ 89 | exordelabs/exorde-cli \ 90 | -m 0x0F67059ea5c125104E46B46769184dB6DC405C42 \ 91 | -l 2 92 | ``` 93 | 94 | For more detailled informations, please read the [full documentation](https://docs.exorde.network). 95 | 96 | ## How to update the Docker image: 97 | 98 | **Note: Exorde CLI has an auto update mechanism, there is no need to pull a new Docker image. Script files inside the container are updated regularly. Pull a new image is useful only if the auto update fails.** 99 | 100 | If you are already running Exorde CLI with Docker and you want to use a new uploaded image, please follow these instructions: 101 | 102 | 1. Stop and delete all running containers of Exorde CLI: 103 | 104 | ``` 105 | docker stop && docker rm 106 | ``` 107 | 108 | For example, if you are running only one container named "exorde-cli": 109 | 110 | ``` 111 | docker stop exorde-cli && docker rm exorde-cli 112 | ``` 113 | 114 | 2. Start new containers: 115 | ```bash 116 | docker run \ 117 | -d \ 118 | --restart unless-stopped \ 119 | --pull always \ 120 | --name \ 121 | exordelabs/exorde-cli \ 122 | -m \ 123 | -l 124 | ``` 125 | 126 | ## When running 127 | 128 | For example, if you run in conda mode with `-l 2` (moderate amount of logs), you should see this in the console: 129 | 130 | > ⚠ This output is outdated, it will be replaced soon. ⚠ 131 | 132 | ```bash 133 | $ python Launcher.py -m 0x0000000000000000000000000000000000000001 -l 2 134 | Selected logging > Level: 2 . (0 = no logs, 1 = general logs, 2 = validation logs, 3 = 135 | validation + scraping logs, 4 = detailed validation + scraping logs 136 | 137 | [INITIAL MODULE SETUP] Downloading code modules on decentralized 138 | storage... 139 | Code Sub-Module 1 / 4 Downloading... https://bafybeibuxrjwffjeymrjlkd2r35r5rdlzxuavoeympqgr7xrxor6hp3bh4.ipfs.w3s.link/Transaction.py 140 | Code Sub-Module 2 / 4 Downloading... https://bafybeifqnq76utn767m4qbwd4j2jg6k3ypwcr2do7gkk3b26ooxfmzgc5e.ipfs.w3s.link/Scraper.py 141 | Code Sub-Module 3 / 4 Downloading... https://bafybeibbygfm276hjion7ocaoyp3wlfodszhlba6jy3b3fzd37zawkfbgi.ipfs.w3s.link/Validator.py 142 | Code Sub-Module 4 / 4 Downloading... https://bafybeicdgmxvetbi4yqjztzzroevcfvnwobk6zomsz5nh4lvb3dftyimxa.ipfs.w3s.link/App.py 143 | 144 | [Init] UPDATING CONFIG [Init] READING CONFIG FILE [Init] Current Config : {'ExordeApp': {'ERCAddress': '', 'MainERCAddress': '', 'Updated': 0, 'SendCountryInfo': 1, 'lastInfo': 'Hello, you are now an Exorder!', 'lastUpdate': '1.3.1'}} 145 | [Init] FIRST WORKER LAUNCH 146 | [Init] New Worker Local Address = 0x4A94c5D4C49597cd889eB569D0Bf4d6e2aC3aE29 147 | [Init] First funding of the worker wallet [Initial Auto Faucet] Top up sFuel & some EXDT to worker address... 148 | [Faucet] selecting Auto-Faucet 149 | 150 | ... 151 | ``` 152 | 153 | The module is autonomous. 154 | 155 | ## Spontaneous updates 156 | 157 | Sometimes, Exorde Labs needs to push some update in the code. The module will detect it, and kill itself. 158 | This is important for the Exorde Network to remain hommogenous, so older versions have to be killed right away. 159 | 160 | When this happens, the module will print a message & shut down. It has to be restarted manually. 161 | -------------------------------------------------------------------------------- /Transaction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 20 14:20:53 2022 4 | 5 | @author: florent, mathias 6 | Exorde Labs 7 | """ 8 | 9 | 10 | default_gas_amount = 10_000_000 11 | 12 | class ContractManager(): 13 | 14 | def __init__(self, address = "", key =""): 15 | 16 | self._AccountAddress = address 17 | self._AccountKey = key 18 | self._TransactionManager = TransactionManager(self) 19 | self.netConfig = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/NetworkConfig.txt").json() 20 | self.w3 = Web3(Web3.HTTPProvider(self.netConfig["_urlTxSkale"])) 21 | 22 | 23 | to = 60 24 | self.contracts = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ContractsAddresses.txt", timeout=to).json() 25 | self.abis = dict() 26 | #self.abis["AttributeStore"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/DataSpotting.sol/AttributeStore.json", timeout=to).json() 27 | self.abis["EXDT"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/daostack/controller/daostack/controller/DAOToken.sol/DAOToken.json", timeout=to).json() 28 | self.abis["DataSpotting"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/DataSpotting.sol/DataSpotting.json", timeout=to).json() 29 | #self.abis["DataFormatting"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/DataFormatting.sol/DataFormatting.json", timeout=to).json() 30 | #self.abis["DLL"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/DataSpotting.sol/DLL.json", timeout=to).json() 31 | #self.abis["IEtherBase"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/sfueldistribute.sol/IEtherbase.json", timeout=to).json() 32 | self.abis["Reputation"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/daostack/controller/daostack/controller/Reputation.sol/Reputation.json", timeout=to).json() 33 | #self.abis["IRewardManager"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/DataSpotting.sol/IRewardManager.json", timeout=to).json() 34 | #self.abis["IStakeManager"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/DataSpotting.sol/IStakeManager.json", timeout=to).json() 35 | #self.abis["RandomAllocator"] = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ABIs/worksystems/RandomAllocator.sol/RandomAllocator.json", timeout=to).json() 36 | self.abis["RewardsManager"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/RewardsManager.sol/RewardsManager.json", timeout=to).json() 37 | self.abis["StakingManager"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/StakingManager.sol/StakingManager.json", timeout=to).json() 38 | self.abis["ConfigRegistry"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/ConfigRegistry.sol/ConfigRegistry.json", timeout=to).json() 39 | self.abis["AddressManager"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/AddressManager.sol/AddressManager.json", timeout=to).json() 40 | 41 | def instantiateContract(self, arg: str): 42 | 43 | contract = self.w3.eth.contract(self.contracts[arg], abi=self.abis[arg]["abi"]) 44 | return contract 45 | 46 | def readStake(self): 47 | 48 | sm = self.instantiateContract("StakingManager") 49 | stakeAmount = sm.functions.AvailableStakedAmountOf(self._AccountAddress).call() 50 | if(stakeAmount > Web3.toWei(25, 'ether')): 51 | return True 52 | else: 53 | return False 54 | 55 | def StakeManagement(self, transactManager): 56 | 57 | contract = self.instantiateContract("EXDT") 58 | sm = self.instantiateContract("StakingManager") 59 | 60 | stakeAmount = sm.functions.AvailableStakedAmountOf(self._AccountAddress).call() 61 | stakeAllocated = sm.functions.AllocatedStakedAmountOf(self._AccountAddress).call() 62 | 63 | if(stakeAmount >= 100 ): 64 | return True 65 | else: 66 | 67 | try: 68 | amount = Web3.toWei(100, 'ether') 69 | increment_tx = contract.functions.approve(self.contracts["StakingManager"], amount).buildTransaction( 70 | { 71 | 'from': self._AccountAddress, 72 | 'gasPrice': self.w3.eth.gas_price, 73 | 'nonce': self.w3.eth.get_transaction_count(self._AccountAddress), 74 | } 75 | ) 76 | transactManager.waitingRoom.put((increment_tx, self._AccountAddress, self._AccountKey)) 77 | 78 | amount_check = contract.functions.allowance(self._AccountAddress, self.contracts["StakingManager"]).call() 79 | 80 | 81 | increment_tx = sm.functions.deposit(Web3.toWei(100, 'ether')).buildTransaction( 82 | { 83 | 'from': self._AccountAddress, 84 | 'gasPrice': self.w3.eth.gas_price, 85 | 'nonce': self.w3.eth.get_transaction_count(self._AccountAddress), 86 | } 87 | ) 88 | 89 | transactManager.waitingRoom.put((increment_tx, self._AccountAddress, self._AccountKey)) 90 | 91 | increment_tx = sm.functions.Stake(Web3.toWei(100, 'ether')).buildTransaction( 92 | { 93 | 'from': self._AccountAddress, 94 | 'gasPrice': self.w3.eth.gas_price, 95 | 'nonce': self.w3.eth.get_transaction_count(self._AccountAddress), 96 | } 97 | ) 98 | transactManager.waitingRoom.put((increment_tx, self._AccountAddress, self._AccountKey)) 99 | time.sleep(30) 100 | except Exception as e: 101 | pass 102 | return True 103 | 104 | 105 | 106 | class TransactionManager(): 107 | def __init__(self, cm): 108 | self.netConfig = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/NetworkConfig.txt").json() 109 | self.w3 = Web3(Web3.HTTPProvider(self.netConfig["_urlSkale"])) 110 | self.w3Tx = Web3(Web3.HTTPProvider(self.netConfig["_urlTxSkale"])) 111 | self.waitingRoom = Queue() 112 | self.waitingRoom_VIP = Queue() 113 | self.run = True 114 | self.last_block = self.w3.eth.get_block('latest')["number"]-1 115 | self.cm = cm 116 | x = threading.Thread(target=self.SendTransactions) 117 | x.daemon = True 118 | x.start() 119 | 120 | def SendTransactions(self): 121 | 122 | while True: 123 | if(self.waitingRoom_VIP.qsize() == 0 and self.waitingRoom.qsize() == 0): 124 | time.sleep(5) 125 | pass 126 | 127 | else: 128 | 129 | try: 130 | if(self.waitingRoom_VIP.qsize() != 0): 131 | for k_sending_trials in range(3): 132 | try: 133 | time.sleep(1+k_sending_trials*3.5) 134 | increment_tx = self.waitingRoom_VIP.get() 135 | previous_nounce = self.w3.eth.get_transaction_count(increment_tx[1]) 136 | 137 | increment_tx[0]["nonce"] = previous_nounce 138 | 139 | gas = default_gas_amount 140 | try: 141 | gasEstimate = self.w3.eth.estimate_gas(increment_tx[0])*1.5 142 | if gasEstimate < 100_000: 143 | gas = gasEstimate + 500_000 144 | elif gasEstimate < 1_500_000: 145 | gas = gasEstimate + 2_000_000 146 | except Exception as e: 147 | if detailed_validation_printing_enabled: 148 | print("[TRANSACTION MANAGER] Gas estimation failed: ",e) 149 | 150 | increment_tx[0]["gas"] = int(round(int(gas),0)) 151 | 152 | if detailed_validation_printing_enabled: 153 | print("[TRANSACTION MANAGER] Gas = ",increment_tx[0]["gas"]) 154 | print("[TRANSACTION MANAGER] tx =>",increment_tx) 155 | 156 | # SIGN TRANSACTION 157 | tx_create = self.w3.eth.account.sign_transaction(increment_tx[0], increment_tx[2]) 158 | 159 | # SEND RAW TRANSACTION VIA THE TX ENDPOINT 160 | tx_hash = self.w3Tx.eth.send_raw_transaction(tx_create.rawTransaction) 161 | 162 | time.sleep(2) 163 | for i in range (10): 164 | time.sleep(i*1.5+1) 165 | # WAIT FOR NEW NOUNCE BY READING PROXY 166 | current_nounce = self.w3.eth.get_transaction_count(increment_tx[1]) 167 | if(current_nounce > previous_nounce): 168 | # found a new transaction because account nounce has increased 169 | break 170 | 171 | # WAIT FOR TX RECEIPT 172 | tx_receipt = self.w3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency = 3) 173 | try: 174 | tx_gasUsed = tx_receipt['gasUsed'] 175 | tx_status = int(tx_receipt['status']) 176 | print("\nTriggerValidation ",n_iter," gas limit = ",gas) 177 | tx_status_str = "Failure" 178 | if tx_status == 1 : 179 | tx_status_str = "Success" 180 | if detailed_validation_printing_enabled: 181 | print("[TRANSACTION MANAGER] Transaction Status = ", tx_status_str, " , Gas used = ",tx_gasUsed) 182 | except Exception as e: 183 | if detailed_validation_printing_enabled: 184 | print("[TRANSACTION MANAGER] Tx Receipt failed : ",e) 185 | 186 | 187 | self.last_block = self.w3.eth.get_block('latest')["number"] 188 | break 189 | except Exception as e: 190 | if detailed_validation_printing_enabled: 191 | print("[TRANSACTION MANAGER] Error : ",e) 192 | pass 193 | 194 | else: 195 | for k_sending_trials in range(2): 196 | try: 197 | time.sleep(1+k_sending_trials*3.5) 198 | increment_tx = self.waitingRoom.get() 199 | 200 | previous_nounce = self.w3.eth.get_transaction_count(increment_tx[1]) 201 | 202 | increment_tx[0]["nonce"] = previous_nounce 203 | gas = default_gas_amount 204 | try: 205 | gasEstimate = self.w3.eth.estimate_gas(increment_tx[0])*1.5 206 | if gasEstimate < 100_000: 207 | gas = gasEstimate + 500_000 208 | elif gasEstimate < 1_500_000: 209 | gas = gasEstimate + 2_000_000 210 | except Exception as e: 211 | if detailed_validation_printing_enabled: 212 | print("[TRANSACTION MANAGER] Gas estimation failed: ",e) 213 | 214 | increment_tx[0]["gas"] = int(round(int(gas),0)) 215 | 216 | if detailed_validation_printing_enabled: 217 | print("[TRANSACTION MANAGER] Gas = ",increment_tx[0]["gas"]) 218 | print("[TRANSACTION MANAGER] tx =>",increment_tx) 219 | 220 | # SIGN TRANSACTION 221 | tx_create = self.w3.eth.account.sign_transaction(increment_tx[0], increment_tx[2]) 222 | 223 | # SEND RAW TRANSACTION VIA THE TX ENDPOINT 224 | tx_hash = self.w3Tx.eth.send_raw_transaction(tx_create.rawTransaction) 225 | 226 | time.sleep(2) 227 | for i in range (10): 228 | time.sleep(i*1.5+1) 229 | # WAIT FOR NEW NOUNCE BY READING PROXY 230 | current_nounce = self.w3.eth.get_transaction_count(increment_tx[1]) 231 | if(current_nounce > previous_nounce): 232 | # found a new transaction because account nounce has increased 233 | break 234 | 235 | # WAIT FOR TX RECEIPT 236 | tx_receipt = self.w3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency = 3) 237 | try: 238 | tx_gasUsed = tx_receipt['gasUsed'] 239 | tx_status = int(tx_receipt['status']) 240 | print("\nTriggerValidation ",n_iter," gas limit = ",gas) 241 | tx_status_str = "Failure" 242 | if tx_status == 1 : 243 | tx_status_str = "Success" 244 | if detailed_validation_printing_enabled: 245 | print("[TRANSACTION MANAGER] Transaction Status = ", tx_status_str, " , Gas used = ",tx_gasUsed) 246 | except Exception as e: 247 | if detailed_validation_printing_enabled: 248 | print("[TRANSACTION MANAGER] Tx Receipt failed : ",e) 249 | 250 | self.last_block = self.w3.eth.get_block('latest')["number"] 251 | break 252 | except Exception as e: 253 | if detailed_validation_printing_enabled: 254 | print("[TRANSACTION MANAGER] Error : ",e) 255 | pass 256 | except Exception as e: 257 | if detailed_validation_printing_enabled: 258 | print("[TRANSACTION MANAGER] Major Error : ",e) 259 | time.sleep(3) 260 | pass 261 | 262 | 263 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"TRANSACTION", "import", "LOADED")) 264 | -------------------------------------------------------------------------------- /Launcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Jan 24th 2023 4 | Exorde Testnet 5 | 6 | @author: florent, mathias 7 | Exorde Labs 8 | Version = v1.3.5c 9 | """ 10 | 11 | import boto3 12 | from collections import Counter, deque 13 | import csv 14 | import datetime as dt 15 | from datetime import timezone 16 | from dateutil.parser import parse 17 | from eth_account import Account 18 | import facebook_scraper as fb 19 | from functools import partial 20 | from ftlangdetect import detect 21 | detect.eprint = lambda x: None 22 | from geopy.geocoders import Nominatim 23 | import html 24 | # from idlelib.tooltip import Hovertip 25 | from iso639 import languages 26 | import itertools 27 | import json 28 | # import keyboard 29 | # import libcloud 30 | from lxml.html.clean import Cleaner 31 | import numpy as np 32 | from operator import itemgetter 33 | import os 34 | import pandas as pd 35 | from pathlib import Path 36 | import pickle 37 | # from PIL import Image, ImageTk, ImageFile 38 | # from plyer import notification 39 | import pytz 40 | from queue import Queue 41 | import random 42 | from random import randint 43 | import re 44 | import requests 45 | from requests_html import HTML 46 | from requests_html import HTMLSession 47 | from scipy.special import softmax, expit 48 | # import shutils 49 | import snscrape.modules 50 | import string 51 | import sys 52 | import threading 53 | import time 54 | import tldextract 55 | # import transformers 56 | # from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, TFAutoModelForSequenceClassification 57 | import unicodedata 58 | import urllib.request 59 | import warnings 60 | import web3 61 | from web3 import Web3, HTTPProvider 62 | import webbrowser 63 | import yake 64 | import warnings 65 | warnings.filterwarnings("ignore") 66 | import hashlib 67 | # try: 68 | # import logging, timeit 69 | # logging.basicConfig(level=logging.DEBUG, format="%(message)s") 70 | # except Exception as e: 71 | # print(e) 72 | 73 | import argparse 74 | 75 | 76 | RAM_HOLDER_AMOUNT_base = 736000000 # reserve 512Mb of Memory 77 | ramholder = bytearray(RAM_HOLDER_AMOUNT_base) 78 | 79 | def DownloadSingleIPFSFile(ipfsHash, timeout_=5, max_trials_=2): 80 | ## constants & parameters 81 | _headers = { 82 | "user-agent": ( 83 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 84 | "Chrome/88.0.4324.146 Safari/537.36" 85 | ) 86 | } 87 | for _ in range(max_trials_): 88 | try: 89 | gateways = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/ipfs_gateways.txt").text.split("\n")[:-1] 90 | except: 91 | time.sleep(3) 92 | nb_gateways = len(gateways) 93 | content = None 94 | ## download each file after the other 95 | print("\nFetching IPFS file = ",ipfsHash) 96 | isOk = False 97 | # retry all gateways twice, after pause of 10s in between, before giving up on a batch 98 | for trial in range(max_trials_): 99 | _used_timeout = timeout_ * (1+trial) 100 | print("trial n°", trial, "/", max_trials_-1) 101 | ## initialize the gateway loop 102 | gateway_cursor = 0 103 | ### iterate a trial of the download over all gateways we have 104 | for _ in gateways: 105 | _used_gateway = gateways[gateway_cursor] 106 | try: 107 | _endpoint_url = _used_gateway+ipfsHash 108 | print("\tDownload via: ",_endpoint_url) 109 | content = requests.get(_endpoint_url, headers=_headers, stream=False, 110 | timeout=_used_timeout) 111 | try: 112 | content = content.json() 113 | except: 114 | print("\t\t--failed to open the content with json") 115 | content = None 116 | if content is not None: 117 | isOk = True 118 | break 119 | except Exception as e: 120 | gateway_cursor += 1 121 | if gateway_cursor >= nb_gateways: 122 | print("\t----Tried all gateways") 123 | break 124 | ## Break from gateway loop if we got the file 125 | if isOk: 126 | break 127 | time.sleep(0.5) 128 | ## Break from trial loop if we got the file 129 | if isOk: 130 | break 131 | time.sleep(0.3) 132 | return content 133 | 134 | 135 | def SafeURLDownload(URL, timeout_=2, max_trials_=3): 136 | ## constants & parameters 137 | _headers = { 138 | "user-agent": ( 139 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 140 | "Chrome/88.0.4324.146 Safari/537.36" 141 | ) 142 | } 143 | content = None 144 | ## download each file after the other 145 | isOk = False 146 | # retry all gateways twice, after pause of 10s in between, before giving up on a batch 147 | for trial in range(max_trials_): 148 | _used_timeout = timeout_ * (1+trial) 149 | # print("trial n°",trial,"/",(max_trials_-1)) 150 | ## initialize the gateway loop 151 | gateway_cursor = 0 152 | ### iterate a trial of the download over all gateways we have 153 | try: 154 | _endpoint_url = URL 155 | if general_printing_enabled: 156 | print("\tDownloading... ", _endpoint_url) 157 | content = requests.get(_endpoint_url, headers=_headers, stream=False, 158 | timeout=_used_timeout) 159 | if content is not None: 160 | isOk = True 161 | break 162 | except Exception as e: 163 | if general_printing_enabled: 164 | print("Fail: ",e) 165 | ## Break from trial loop if we got the file 166 | if isOk: 167 | break 168 | time.sleep(0.3) 169 | return content 170 | 171 | def SelfUpdateProcedure(): 172 | launcher_fp = 'Launcher.py' 173 | try: 174 | req = requests.get("https://raw.githubusercontent.com/exorde-labs/ExordeModuleCLI/main/Launcher.py") 175 | launcher_code_content = req.content 176 | github_launcher_code_text = req.text 177 | if len(github_launcher_code_text) < 100: 178 | raise ValueError('Error fetching a valid Launcher code.') 179 | github_launcher_sig = str(hashlib.md5(launcher_code_content).hexdigest()) 180 | # Open,close, read file and calculate MD5 on its contents 181 | with open(launcher_fp, 'rb') as file_to_check: 182 | # read contents of the file 183 | data = file_to_check.read() 184 | # pipe contents of the file through 185 | local_launcher_sig = str(hashlib.md5(data).hexdigest()) 186 | print("Local version signature = ",local_launcher_sig, " Latest (github) version signature = ",github_launcher_sig) 187 | except Exception as e: 188 | print("Init error: ",e) 189 | 190 | try: 191 | if(local_launcher_sig != github_launcher_sig): 192 | # overwrite Launcher 193 | with open(launcher_fp, 'w+', newline='', encoding='utf-8') as filetowrite: 194 | filetowrite.write(github_launcher_code_text) 195 | print("\n\n*********\nYour Exorde Testnet Module has been updated!\n ---> Please RESTART the program.\nExorde Labs, 2022\n*********") 196 | exit(1) 197 | except Exception as e: 198 | print("Error :",e) 199 | print("\n\n***************************\nA new Version has been released, you need to download the new version (CLI or Docker).\ 200 | \nPlease download the latest code at https://github.com/exorde-labs/ExordeModuleCLI\nStart from a fresh module installation. Thank you.\nExorde Labs, 2022\n***************************") 201 | exit(1) 202 | 203 | ################## ARG PARSING 204 | parser = argparse.ArgumentParser() 205 | 206 | parser.add_argument('-m', '--main-address', help='Main Ethereum Address, which will get all REP & EXDT for this local worker contribution. Exorde Reputation is non-transferable. Correct usage example: -m 0x0F67059ea5c125104E46B46769184dB6DC405C42', required=True) 207 | parser.add_argument('-l', '--logging', help='level of logging in the console: 0 = no logs, 1 = general logs, 2 = validation logs, 3 = validation + scraping logs, 4 = detailed validation + scraping logs (e.g. for troubleshooting)', default = 1) 208 | parser.add_argument('-d', '--debug', nargs='?', help='debug logs', default = 0) 209 | parser.add_argument('-n', '--noloc', nargs='?', help='disable sharing your country info (ONLY) (example: FR, UK, US, SP, etc) for statistics purposes. No personal information is ever sent.', default = 0) 210 | 211 | localization_enabled = True 212 | try: 213 | args = parser.parse_args() 214 | argsdict = vars(args) 215 | main_wallet_ = argsdict['main_address'] 216 | 217 | is_main_wallet_valid = Web3.isAddress(main_wallet_) 218 | if not is_main_wallet_valid: 219 | print( 220 | "[Error] INVALID Main-address argument. A valid Ethereum address looks like " 221 | "'0x0F67059ea5c125104E46B46769184dB6DC405C42'" 222 | ) 223 | sys.exit(1) 224 | main_wallet_ = Web3.toChecksumAddress(main_wallet_) 225 | 226 | verbosity_ = int(argsdict['logging']) 227 | if verbosity_ > 0: 228 | 229 | if verbosity_ >= 3: 230 | verbosity_ = 3 231 | print( 232 | "Selected logging Level: ", 233 | verbosity_, 234 | ( 235 | ". (0 = no logs, 1 = general logs, 2 = validation logs, " 236 | "3 = validation + scraping logs, 4 = detailed validation + scraping logs" 237 | ), 238 | ) 239 | 240 | debug_ = int(argsdict['debug']) 241 | if debug_ > 0: 242 | print("******* [DEBUG LOGS ACTIVATED] *******") 243 | 244 | noloc_ = int(argsdict['noloc']) 245 | if noloc_ == 1: 246 | print("[Localization (Country) statistic disabled]") 247 | localization_enabled = False 248 | except: 249 | parser.print_help() 250 | sys.exit(1) 251 | 252 | # 0 = all disabled 253 | general_printing_enabled = False 254 | scrape_printing_enabled = False 255 | validation_printing_enabled = False 256 | detailed_validation_printing_enabled = False 257 | 258 | sys.stderr = open(os.devnull, "w") # silence stderr 259 | 260 | # 1 = general logs only 261 | if verbosity_ == 1: 262 | general_printing_enabled = True 263 | # 2 = validation logs 264 | if verbosity_ == 2: 265 | general_printing_enabled = True 266 | validation_printing_enabled = True 267 | # 3 = validation + scraping logs 268 | if verbosity_ == 3: 269 | general_printing_enabled = True 270 | validation_printing_enabled = True 271 | scrape_printing_enabled = True 272 | # debug log 273 | if debug_ == 1: 274 | general_printing_enabled = True 275 | validation_printing_enabled = True 276 | scrape_printing_enabled = True 277 | detailed_validation_printing_enabled = True 278 | 279 | ################## NETWORK SELECTION 280 | 281 | mainnet_selected = False 282 | testnet_selected = False 283 | potential_testnet = list() 284 | networkSelector_url = "https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/NetworkLoadBalancing.json" 285 | 286 | try: 287 | networkSelector = requests.get(networkSelector_url, timeout=30).json() 288 | except Exception as e: 289 | print(e) 290 | print(requests.get(networkSelector_url, timeout=30)) 291 | 292 | mainnet_threshold_high = int(networkSelector["mainnet"]) 293 | 294 | if detailed_validation_printing_enabled: 295 | print("Mainnet selected with probability : ",mainnet_threshold_high," %") 296 | if mainnet_threshold_high > 100: 297 | mainnet_threshold_high = 100 298 | random_number = random.randint(1, 100) 299 | if random_number < mainnet_threshold_high: 300 | mainnet_selected = True 301 | # testnet listing 302 | for network_ in networkSelector: 303 | if network_ != "mainnet": 304 | probability_testnet_selection = int(networkSelector[network_]) 305 | if probability_testnet_selection > 0: 306 | if detailed_validation_printing_enabled: 307 | print("possible testnet choice = ",network_) 308 | potential_testnet.append(network_) 309 | 310 | mainnet_config_github_url = 'https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/NetworkConfig.txt' 311 | testnet_config_github_url = 'https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/NetworkConfig.txt' 312 | 313 | if mainnet_selected: 314 | print("\n-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ TESTNET CHAIN A -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_") 315 | netConfig = requests.get(mainnet_config_github_url, timeout=30).json() 316 | else: 317 | print("\n-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-- TESTNET CHAIN B *-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--") 318 | netConfig = requests.get(testnet_config_github_url, timeout=30).json() 319 | 320 | 321 | ################## NETWORK CONNECTION 322 | 323 | try: 324 | syncnode_count = int(netConfig["SyncNodeCount"]) 325 | except: 326 | print("Error: Could not read sync node count") 327 | 328 | print("Selecting 1/",syncnode_count, " Sync Nodes.") 329 | try: 330 | random_number = random.randint(1, syncnode_count) 331 | sync_node_id = "_urlSkale{}".format(str(random_number)) 332 | print("Sync_node_id = ",sync_node_id) 333 | except Exception as e: 334 | print("Error: could select sync node randomly: ",e) 335 | exit(1) 336 | 337 | selected_provider_ = netConfig[sync_node_id] 338 | print("Selected Read-only Provider = ",selected_provider_) 339 | 340 | w3 = Web3(Web3.HTTPProvider(selected_provider_)) 341 | w3Tx = Web3(Web3.HTTPProvider(netConfig["_urlTxSkale"])) 342 | 343 | ## NETWORK FAILURE MITIGATION: select network if network last block is > 20min 344 | print("Reading latest block info...") 345 | try: 346 | last_block_on_read = w3.eth.get_block('latest') 347 | now = time.time() 348 | duration = last_block_on_read['timestamp']-now 349 | print("Latest block on Read Proxy = ",last_block_on_read["number"]," -> ",round(duration)," seconds ago (",int(duration/60)," min ).") 350 | except Exception as e: 351 | print("Sync Node Error when Reading ...") 352 | duration = 60 353 | if detailed_validation_printing_enabled: 354 | print("Error = ",e) 355 | 356 | if abs(duration) > (20*60): 357 | print("\n*****\nNetwork seems to have stopped block production > 20 min ago.\nRestart later please\n*****") 358 | exit(1) 359 | # random_number = random.randint(1, 100) 360 | # if random_number < probability_testnet_selection: 361 | # testnet_selected = True 362 | # print("testnet selected.") 363 | ## NETWORK FAILURE MITIGATION: select network if network last block is > 20min 364 | 365 | ################################################################################################################################################ 366 | 367 | ConfigBypassURL = "https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/CodeModules.txt" 368 | 369 | ################## BLOCKCHAIN INTERFACING 370 | to = 60 371 | if mainnet_selected: 372 | contracts = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/ContractsAddresses.txt", timeout=to).json() 373 | else: 374 | contracts = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ContractsAddresses.txt", timeout=to).json() 375 | abis = dict() 376 | abis["ConfigRegistry"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/ConfigRegistry.sol/ConfigRegistry.json", timeout=to).json() 377 | 378 | print("Config Address = ",contracts["ConfigRegistry"]) 379 | contract = w3.eth.contract(contracts["ConfigRegistry"], abi=abis["ConfigRegistry"]["abi"]) 380 | 381 | config_reg_contract = contract 382 | override_code_dict = dict() 383 | # override_code_dict["_moduleHashContracts_cli"] = "https://bafybeibuxrjwffjeymrjlkd2r35r5rdlzxuavoeympqgr7xrxor6hp3bh4.ipfs.w3s.link/Transaction.py" # Transaction.py 384 | # override_code_dict["_moduleHashSpotting_cli"] = "https://bafybeifqnq76utn767m4qbwd4j2jg6k3ypwcr2do7gkk3b26ooxfmzgc5e.ipfs.w3s.link/Scraper.py" # Scraper.py 385 | # override_code_dict["_moduleHashSpotChecking_cli"] = "https://bafybeibbygfm276hjion7ocaoyp3wlfodszhlba6jy3b3fzd37zawkfbgi.ipfs.w3s.link/Validator.py" # Validator.py 386 | # override_code_dict["_moduleHashApp_cli"] = "https://bafybeicdgmxvetbi4yqjztzzroevcfvnwobk6zomsz5nh4lvb3dftyimxa.ipfs.w3s.link/App.py" # App.py 387 | 388 | ########### AUTO UPDATE PROCEDURE ################## 389 | SelfUpdateProcedure() 390 | #################################################### 391 | 392 | if general_printing_enabled: 393 | print("\n[INITIAL MODULE SETUP] Downloading code modules on decentralized storage...") 394 | 395 | ################## READING ONCHAIN CONFIG TO DOWNLOAD LATEST CODE 396 | module_hash_list = ["_moduleHashContracts_cli", "_moduleHashSpotting_cli", "_moduleHashSpotChecking_cli", 397 | "_moduleHashApp_cli"] 398 | 399 | ############################## 400 | bypass_enabled = True 401 | ############################## 402 | 403 | 404 | boot_sleep_delay = randint(5,1*60) # sleep randomly between 30s & 10 minutes 405 | print("[ Network Load Balancing ] Waiting ",boot_sleep_delay, " seconds - System status = Booting.") 406 | time.sleep(boot_sleep_delay) 407 | 408 | nb_modules_fetched_from_config = 0 409 | nb_module_to_fetch = len(module_hash_list) 410 | 411 | code_array = [] 412 | 413 | if bypass_enabled == False: 414 | for im, value in enumerate(module_hash_list): 415 | #print(value) 416 | success = False 417 | trials = 0 418 | if general_printing_enabled: 419 | print("\tCode Sub-Module ",(im+1)," / ", len(module_hash_list), end='') 420 | 421 | print(" .") 422 | while(trials < 3): 423 | print(".",end='') 424 | try: 425 | if value in override_code_dict: 426 | URL = override_code_dict[value] 427 | code = SafeURLDownload(URL).text 428 | else: 429 | URL = hashValue = contract.functions.get(value).call() 430 | code = SafeURLDownload(URL).text 431 | code_array.append(code) 432 | success = True 433 | nb_modules_fetched_from_config += 1 434 | break 435 | except: 436 | time.sleep(2*(trials + 1)) 437 | trials += 1 438 | 439 | # if success: 440 | # exec(code) 441 | 442 | if bypass_enabled or (nb_modules_fetched_from_config != nb_module_to_fetch): 443 | print("\n****************\n[BYPASS] Fetching from ExordeLabs github: ", ConfigBypassURL) 444 | bypassModules = requests.get(ConfigBypassURL).json() 445 | for im, ModuleURL in enumerate(bypassModules): 446 | #print(value) 447 | success = False 448 | trials = 0 449 | if general_printing_enabled: 450 | print("\t[Github Override] Code Sub-Module ",(im+1)) 451 | while(trials < 3): 452 | try: 453 | code = SafeURLDownload(bypassModules[ModuleURL]).text 454 | success = True 455 | break 456 | except: 457 | time.sleep(2*(trials + 1)) 458 | trials += 1 459 | 460 | if(success == True): 461 | exec(code) 462 | else: # run the modules from the config 463 | time.sleep(1) 464 | for code_ in code_array: 465 | exec(code_) 466 | time.sleep(1) 467 | 468 | 469 | 470 | ############# LAUNCH THE CORE MODULE 471 | desktop_app() 472 | 473 | with open("localConfig.json", "r") as f: 474 | localconfig = json.load(f) 475 | 476 | while True: 477 | # sleep to maintain alive 478 | time.sleep(5*60) 479 | SelfUpdateProcedure() 480 | ## check update 481 | try: 482 | if general_printing_enabled: 483 | print("[UPDATE SYSTEM] Checking new updates...") 484 | try: 485 | _version = config_reg_contract.functions.get("version").call() 486 | _lastInfo = config_reg_contract.functions.get("lastInfo").call() 487 | except: 488 | _version = localconfig["ExordeApp"]["lastUpdate"] 489 | 490 | if("lastUpdate" not in localconfig["ExordeApp"]): 491 | localconfig["ExordeApp"]["lastUpdate"] = _version 492 | with open("localConfig.json", "w") as f: 493 | json.dump(localconfig, f) 494 | try: 495 | print("[UPDATE SYSTEM] Last Version: ", localconfig["ExordeApp"]["lastUpdate"], "New:", _version) 496 | except: 497 | print("[UPDATE SYSTEM] No Last Version: ", "New:", _version) 498 | 499 | if localconfig["ExordeApp"]["lastUpdate"] != _version: 500 | print("\n\n\n***************************\n",\ 501 | "Version {}".format(_version)," has been released.\nPlease restart your module to continue.\nAuto quit, please relaunch the program. \n") 502 | print("Last message from Exorde Labs => ",_lastInfo,"\n***************************.") 503 | # update localconfig, important 504 | localconfig["ExordeApp"]["lastUpdate"] = _version 505 | with open("localConfig.json", "w") as f: 506 | json.dump(localconfig, f) 507 | exit(1) 508 | except Exception as e: 509 | print(e) 510 | -------------------------------------------------------------------------------- /App.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 28 14:20:53 2022 4 | 5 | @author: florent, mathias 6 | Exorde Labs 7 | """ 8 | 9 | 10 | class Widget(): 11 | 12 | def __init__(self): 13 | 14 | try: 15 | locInfo = requests.get("http://ipinfo.io/json").json() 16 | self.userCountry = Web3.toHex(text=json.dumps(locInfo)) #["country"] 17 | except: 18 | self.userCountry = Web3.toHex(text="Unknown") 19 | 20 | netConfig = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/NetworkConfig.txt").json() 21 | self.w3 = Web3(Web3.HTTPProvider(netConfig["_urlSkale"])) 22 | self.w3Tx = Web3(Web3.HTTPProvider(netConfig["_urlTxSkale"])) 23 | 24 | 25 | if general_printing_enabled: 26 | print("\n[Init] UPDATING CONFIG") 27 | self.readLocalConfig() 28 | 29 | 30 | x = threading.Thread(target=self.submarineManagement) 31 | x.daemon = True 32 | x.start() 33 | 34 | contracts = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ContractsAddresses.txt", timeout=to).json() 35 | abis = dict() 36 | abis["ConfigRegistry"] = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/ConfigRegistry.sol/ConfigRegistry.json", timeout=to).json() 37 | 38 | config_reg_contract = self.w3.eth.contract(contracts["ConfigRegistry"], abi=abis["ConfigRegistry"]["abi"]) 39 | 40 | # Check Updates 41 | with open("localConfig.json", "r") as f: 42 | localconfig = json.load(f) 43 | 44 | ## check update 45 | try: 46 | if general_printing_enabled: 47 | print("[Init Version Check] Checking version on start") 48 | try: 49 | _version = config_reg_contract.functions.get("version").call() 50 | _lastInfo = config_reg_contract.functions.get("lastInfo").call() 51 | print("Latest message from Exorde Labs: ",_lastInfo) 52 | except: 53 | _version = localconfig["ExordeApp"]["lastUpdate"] 54 | 55 | if("lastUpdate" not in localconfig["ExordeApp"]): 56 | localconfig["ExordeApp"]["lastUpdate"] = _version 57 | with open("localConfig.json", "w") as f: 58 | json.dump(localconfig, f) 59 | 60 | if(localconfig["ExordeApp"]["lastUpdate"] != _version): 61 | print("[Init Version Check] Updated to Version: ",_version) 62 | localconfig["ExordeApp"]["lastUpdate"] = _version 63 | with open("localConfig.json", "w") as f: 64 | json.dump(localconfig, f) 65 | else: 66 | print("[Init Version Check] Current Module Version: ",_version) 67 | except Exception as e: 68 | print("[Init Version Check] Error: ",e) 69 | 70 | nb_trials_reading_config = 0 71 | nb_max_before_interrup = 4 72 | while True: 73 | 74 | time.sleep(5*60) 75 | ## Check RemoteKill 76 | try: 77 | try: 78 | _remote_kill = str(config_reg_contract.functions.get("remote_kill").call()) 79 | except: 80 | nb_trials_reading_config += 1 81 | time.sleep(2) 82 | 83 | if(_remote_kill == "kill"): 84 | print("Forced Interruption of your Exorde Module. Check Discord for any update") 85 | exit(1) 86 | 87 | if(nb_trials_reading_config >= nb_max_before_interrup ): 88 | print("Could not read ConfigRegistry ",nb_max_before_interrup," times in a row. The Network might be in trouble, check Discord for any update.") 89 | exit(1) 90 | except Exception as e: 91 | print("RemoteKill Error = ",e) 92 | 93 | 94 | 95 | 96 | def monitor(self): 97 | 98 | try: 99 | if(spotThread.is_alive() == False): 100 | del self.spotThread 101 | self.spotThread = threading.Thread(target=self.manage_scraping) 102 | 103 | self.spotThread.daemon = True 104 | self.spotThread.start() 105 | if(self.checkThread.is_alive() == False): 106 | del self.checkThread 107 | self.checkThread = threading.Thread(target=self.manage_checking) 108 | self.spotThread.daemon = True 109 | self.checkThread.start() 110 | except: 111 | pass 112 | 113 | def submarineManagement(self): 114 | 115 | if general_printing_enabled: 116 | print("[Init] CREATING UTILS") 117 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"WIDGET", "submarineManagement", self.status)) 118 | self.createUtils() 119 | 120 | self.stakeChecking() 121 | 122 | if general_printing_enabled: 123 | print("[Init] CREATING DATA COLLECTION SUBROUTINE") 124 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"WIDGET", "submarineManagement", self.status)) 125 | self.sm = Scraper(self) 126 | self.spotThread = threading.Thread(target=self.sm.manage_scraping) 127 | self.spotThread.daemon = True 128 | self.spotThread.start() 129 | 130 | if general_printing_enabled: 131 | print("[Init] CREATING VALIDATION SUBROUTINE") 132 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"WIDGET", "submarineManagement", self.status)) 133 | 134 | self.val = Validator(self) 135 | # self.checkThread = threading.Thread(target=self.val.manage_checking) 136 | # self.checkThread.start() 137 | 138 | 139 | def to_explorer(self): 140 | 141 | x = threading.Thread(target=webbrowser.open_new, args=("http://explorer.exorde.network/",)) 142 | x.daemon = True 143 | x.start() 144 | #webbrowser.open_new("http://explorer.exorde.network/") 145 | 146 | def openFormattingScreen(self): 147 | 148 | x = threading.Thread(target=webbrowser.open_new, args=("https://light-vast-diphda.explorer.mainnet.skalenodes.com/address/{}/transactions".format(self.localconfig["ExordeApp"]["ERCAddress"]),)) 149 | x.daemon = True 150 | x.start() 151 | #webbrowser.open_new("https://light-vast-diphda.explorer.mainnet.skalenodes.com/address/{}/transactions".format(self.localconfig["ExordeApp"]["ERCAddress"])) 152 | 153 | def Close(self): 154 | if general_printing_enabled: 155 | print("Closing...") 156 | exit(1) 157 | 158 | 159 | def select_random_faucet_pk(self): 160 | Private_key_base_ = "deaddeaddeaddead5fb92d83ed54c0ea1eb74e72a84ef980d42953caaa6d" 161 | ## faucets private keys are ["Private_key_base"+("%0.4x" % i)] with i from 0 to 499. Last 2 bytes is the selector. 162 | 163 | selected_faucet_index_ = random.randrange(0,499+1,1) # [select index between 0 & 499 (500 faucets)] 164 | 165 | hex_selector_bytes = "%0.4x" % selected_faucet_index_ 166 | faucet_private_key_ = Private_key_base_ + hex_selector_bytes 167 | return selected_faucet_index_, faucet_private_key_ 168 | 169 | 170 | def autofund(self): 171 | if general_printing_enabled: 172 | print("[Initial Auto Faucet] Top up sFuel & some EXDT to worker address...") 173 | 174 | ### DECODE PRIVATE KEY FOR STORAGE 175 | 176 | user_address = self.localconfig["ExordeApp"]["ERCAddress"] 177 | 178 | ##### 0 - CHECK VALIDITY 179 | is_user_address_valid = self.w3.isAddress(user_address) 180 | if is_user_address_valid == False: 181 | print("[Init] INVALID USER ADDRESS, ABORT") 182 | os._exit(1) 183 | user_address = self.w3.toChecksumAddress(user_address) 184 | 185 | 186 | chainId_ = 2139927552 187 | EXDT_token_address = "0xcBc357F3077989B4636E93a8Ce193E05cd8cc56E" 188 | 189 | faucet_success = False 190 | 191 | max_nb_autofaucet_trials = 10 192 | nb_autofaucet_trials = 0 193 | while faucet_success == False: 194 | try: 195 | # select random faucet out of the 500 ones 196 | (fi, Private_key) = self.select_random_faucet_pk() 197 | if general_printing_enabled: 198 | print("[Faucet] selecting Auto-Faucet n°",fi) 199 | faucet_address = self.w3.eth.account.from_key(Private_key).address 200 | 201 | 202 | ### 1 - SEND FUEL FIRST 203 | #print("SEND FUEL") 204 | signed_txn = self.w3.eth.account.sign_transaction(dict( 205 | nonce=self.w3.eth.get_transaction_count(faucet_address), 206 | gasPrice=self.w3.eth.gas_price, 207 | gas=1000000, 208 | to=user_address, 209 | value=500000000000000, 210 | data=b'Hi Exorde!', 211 | #type=2, (optional) the type is now implicitly set based on appropriate transaction params 212 | chainId=chainId_, 213 | ), 214 | Private_key, 215 | ) 216 | 217 | previous_nounce = self.w3.eth.get_transaction_count(faucet_address) 218 | 219 | # SEND RAW TRANSACTION VIA THE TX ENDPOINT 220 | tx_hash = self.w3Tx.eth.send_raw_transaction(signed_txn.rawTransaction) 221 | 222 | time.sleep(2) 223 | for i in range (10): 224 | time.sleep(i*1.5+1) 225 | # WAIT FOR NEW NOUNCE BY READING PROXY 226 | current_nounce = self.w3.eth.get_transaction_count(faucet_address) 227 | if(current_nounce > previous_nounce): 228 | # found a new transaction because account nounce has increased 229 | break 230 | 231 | # WAIT FOR TX RECEIPT 232 | tx_receipt = self.w3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency = 3) 233 | 234 | 235 | print("[Faucet] sfuel funding tx = ",tx_receipt.transactionHash.hex()) 236 | 237 | ### 1 - SEND EXDT TOKENS 238 | token_abi = requests.get("https://raw.githubusercontent.com/MathiasExorde/TestnetProtocol-staging/main/ABIs/daostack/controller/daostack/controller/DAOToken.sol/DAOToken.json").json()["abi"] 239 | 240 | tok_contract = self.w3.eth.contract(EXDT_token_address, abi=token_abi) 241 | 242 | token_amount_to_send = 200000000000000000000 # 200 tokens EXDT 243 | increment_tx = tok_contract.functions.transfer(user_address, token_amount_to_send).buildTransaction({ 244 | 'from': faucet_address, 245 | 'nonce': self.w3.eth.get_transaction_count(faucet_address), 246 | 'value': 0, 247 | 'gas': 1000000, 248 | 'gasPrice': self.w3.eth.gas_price, 249 | }) 250 | 251 | tx_create = self.w3.eth.account.sign_transaction(increment_tx, Private_key) 252 | previous_nounce = self.w3.eth.get_transaction_count(faucet_address) 253 | 254 | # SEND RAW TRANSACTION VIA THE TX ENDPOINT 255 | tx_hash = self.w3Tx.eth.send_raw_transaction(signed_txn.rawTransaction) 256 | 257 | time.sleep(2) 258 | for i in range (10): 259 | time.sleep(i*1.5+1) 260 | # WAIT FOR NEW NOUNCE BY READING PROXY 261 | current_nounce = self.w3.eth.get_transaction_count(faucet_address) 262 | if(current_nounce > previous_nounce): 263 | # found a new transaction because account nounce has increased 264 | break 265 | 266 | # WAIT FOR TX RECEIPT 267 | tx_receipt = self.w3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency = 3) 268 | 269 | 270 | print("[Faucet] token funding tx = ",tx_receipt.transactionHash.hex()) 271 | 272 | time.sleep(1) 273 | _trials = 5 274 | read_status = False 275 | for i in range(_trials): 276 | try: 277 | token_balance = tok_contract.functions.balanceOf(user_address).call() 278 | user_token_balance = self.w3.fromWei(token_balance, 'ether') 279 | user_sfuel_balance = self.w3.eth.get_balance(user_address) 280 | read_status = True 281 | break 282 | except Exception as e: 283 | time.sleep((1+int(i))) 284 | 285 | if read_status == False: 286 | continue 287 | 288 | print('[Faucet] Worker EXDT Balance:', user_token_balance, " ") 289 | print('[Faucet] Worker sFuel Balance:', user_sfuel_balance, " sFUEL") 290 | 291 | if user_token_balance > 0 and user_sfuel_balance > 0: 292 | faucet_success = True 293 | print("[Faucet] Auto-Faucet n°",fi, " Success.") 294 | break 295 | nb_autofaucet_trials += 1 296 | if nb_autofaucet_trials >= max_nb_autofaucet_trials: 297 | print("[Faucet] Auto-Faucet Failure. Tried ",max_nb_autofaucet_trials," times. Giving up. Please report this error. Faucets might be empty.") 298 | exit(1) 299 | except Exception as e: 300 | print("[Faucet] Error: ",e) 301 | print("[Faucet] Auto-Faucet n°",fi, " Failure... retrying.") 302 | nb_autofaucet_trials += 1 303 | time.sleep(1+(nb_autofaucet_trials)*2) 304 | if nb_autofaucet_trials >= max_nb_autofaucet_trials: 305 | print("[Faucet] Auto-Faucet critical Failure. Tried ",max_nb_autofaucet_trials," times. Giving up. Please report this error.") 306 | exit(1) 307 | continue 308 | 309 | 310 | def createUtils(self): 311 | self.cm = ContractManager(self.localconfig["ExordeApp"]["ERCAddress"], self.pKey) 312 | self.tm = TransactionManager(self.cm) 313 | 314 | 315 | def generateLocalKey(self): 316 | random.seed(random.random()) 317 | baseSeed = ''.join(random.choices(string.ascii_uppercase + string.digits, k=256)) 318 | acct = Account.create(baseSeed) 319 | key = acct.key 320 | return acct.address, key 321 | 322 | def stakeChecking(self): 323 | 324 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"WIDGET", "stakeChecking", self.status)) 325 | 326 | self.am = self.cm.instantiateContract("AddressManager") 327 | 328 | if(self.localconfig["ExordeApp"]["MainERCAddress"] != ""): 329 | 330 | increment_tx = self.am.functions.ClaimMaster(self.localconfig["ExordeApp"]["MainERCAddress"]).buildTransaction( 331 | { 332 | 'from': self.localconfig["ExordeApp"]["ERCAddress"], 333 | 'gasPrice': self.w3.eth.gas_price, 334 | 'nonce': self.w3.eth.get_transaction_count(self.localconfig["ExordeApp"]["ERCAddress"]), 335 | } 336 | ) 337 | self.tm.waitingRoom_VIP.put((increment_tx, self.localconfig["ExordeApp"]["ERCAddress"], self.pKey)) 338 | 339 | if(self.cm.readStake() == True): 340 | 341 | self.am = self.cm.instantiateContract("AddressManager") 342 | 343 | if(self.localconfig["ExordeApp"]["MainERCAddress"] != ""): 344 | 345 | increment_tx = self.am.functions.ClaimMaster(self.localconfig["ExordeApp"]["MainERCAddress"]).buildTransaction( 346 | { 347 | 'from': self.localconfig["ExordeApp"]["ERCAddress"], 348 | 'gasPrice': self.w3.eth.gas_price, 349 | 'nonce': self.w3.eth.get_transaction_count(self.localconfig["ExordeApp"]["ERCAddress"]), 350 | } 351 | ) 352 | self.tm.waitingRoom_VIP.put((increment_tx, self.localconfig["ExordeApp"]["ERCAddress"], self.pKey)) 353 | else: 354 | 355 | requests.post("https://api.faucet.exorde.network/fundAccount/"+self.localconfig["ExordeApp"]["ERCAddress"]) 356 | 357 | self.am = self.cm.instantiateContract("AddressManager") 358 | 359 | if(self.localconfig["ExordeApp"]["MainERCAddress"] != ""): 360 | 361 | increment_tx = self.am.functions.ClaimMaster(self.localconfig["ExordeApp"]["MainERCAddress"]).buildTransaction( 362 | { 363 | 'from': self.localconfig["ExordeApp"]["ERCAddress"], 364 | 'gasPrice': self.w3.eth.gas_price, 365 | 'nonce': self.w3.eth.get_transaction_count(self.localconfig["ExordeApp"]["ERCAddress"]), 366 | } 367 | ) 368 | self.tm.waitingRoom_VIP.put((increment_tx, self.localconfig["ExordeApp"]["ERCAddress"], self.pKey)) 369 | 370 | self.cm.StakeManagement(self.tm) 371 | if general_printing_enabled: 372 | print("[Init] Staking requirement OK") 373 | 374 | 375 | 376 | def readLocalConfig(self): 377 | self.configFile = "localConfig.json" 378 | 379 | if general_printing_enabled: 380 | print("[Init] READING CONFIG FILE") 381 | 382 | with open("localConfig.json", "r") as f: 383 | self.localconfig = json.load(f) 384 | 385 | print("[Init] Current Config : ",self.localconfig) 386 | new_conf = self.localconfig 387 | if new_conf is None: 388 | new_conf = dict() 389 | 390 | new_conf["ExordeApp"]["MainERCAddress"] = str(main_wallet_) 391 | 392 | if("Updated" not in self.localconfig["ExordeApp"] or self.localconfig["ExordeApp"]["Updated"] == 0): 393 | 394 | if general_printing_enabled: 395 | print("[Init] FIRST WORKER LAUNCH") 396 | 397 | new_conf["ExordeApp"]["ERCAddress"], self.pKey = self.generateLocalKey() 398 | new_conf["ExordeApp"]["Updated"] = 1 399 | new_conf["ExordeApp"]["SendCountryInfo"] = 1 400 | 401 | if general_printing_enabled: 402 | print("[Init] New Worker Local Address = ",new_conf["ExordeApp"]["ERCAddress"]) 403 | print("[Init] First funding of the worker wallet") 404 | self.autofund() 405 | 406 | with open('bob.txt', "wb") as file: 407 | file.write(self.pKey) 408 | 409 | self.localconfig = new_conf 410 | 411 | else: 412 | with open('bob.txt', "rb") as file: 413 | self.pKey = file.read() 414 | 415 | # updating localconfig with new MainERCAddress 416 | with open("localConfig.json", "w") as f: 417 | json.dump(new_conf,f) 418 | 419 | try: 420 | self.allowGeoCoordSending = self.localconfig["ExordeApp"]["SendCountryInfo"] 421 | except: 422 | self.allowGeoCoordSending = 1 423 | 424 | 425 | def updateLocalConfig(self): 426 | with open("localConfig.json", "w") as f: 427 | json.dump(self.localconfig,f) 428 | 429 | 430 | def changeAllowanceGeo(self): 431 | if(self.allowGeoCoordSending == 1): 432 | self.allowGeoCoordSending = 0 433 | else: 434 | self.allowGeoCoordSending = 1 435 | 436 | self.localconfig["ExordeApp"]["SendCountryInfo"] = self.allowGeoCoordSending 437 | self.updateLocalConfig() 438 | 439 | 440 | 441 | def on_closing(self, master): 442 | if general_printing_enabled: 443 | print("[ClaimMaster] Claiming...") 444 | 445 | new_val = main_wallet_ 446 | if(new_val == ""): 447 | print("No Main Ethereum Wallet", "Please indicate your main Ethereum wallet address.") 448 | else: 449 | master.localconfig["ExordeApp"]["MainERCAddress"] = new_val 450 | with open('bob.txt', "rb") as file: 451 | pKey = file.read() 452 | am = master.cm.instantiateContract("AddressManager") 453 | increment_tx = am.functions.ClaimMaster(master.localconfig["ExordeApp"]["MainERCAddress"]).buildTransaction( 454 | { 455 | 'from': master.localconfig["ExordeApp"]["ERCAddress"], 456 | 'gasPrice': master.w3.eth.gas_price, 457 | 'nonce': master.w3.eth.get_transaction_count(master.localconfig["ExordeApp"]["ERCAddress"]), 458 | } 459 | ) 460 | master.tm.waitingRoom_VIP.put((increment_tx, master.localconfig["ExordeApp"]["ERCAddress"], master.pKey, True)) 461 | 462 | master.updateLocalConfig() 463 | self.user_info.destroy() 464 | 465 | 466 | def start_scraping(self, event = None): 467 | try: 468 | if(len(self.master.sm.keywords) < 10): 469 | target = self.keyoneEntry.get() 470 | 471 | if(target != "" and target.lower() not in self.master.sm.keywords and target != None): 472 | self.master.sm.keywords.append(target.lower()) 473 | 474 | elif(target.lower() in self.master.sm.keywords): 475 | print("Processing info", 476 | "This target has already been taken care of..") 477 | else: 478 | print("Processing info", 479 | "Please indicate a topic to look for.") 480 | 481 | self.sConfig.destroy() 482 | if(len(self.master.sm.keywords) < 10): 483 | ScrapingPanel(self.master) 484 | else: 485 | print("Processing info", 486 | "{} scrapers are already running on this machine.\nPlease stop one of them before starting another.".format(len(self.master.sm.keywords))) 487 | except Exception as e: 488 | pass 489 | 490 | 491 | def desktop_app(): 492 | try: 493 | wdg = Widget() 494 | except Exception as e: 495 | print("Init error",e) 496 | 497 | -------------------------------------------------------------------------------- /Validator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 20 14:20:53 2022 4 | 5 | @author: florent, mathias 6 | Exorde Labs 7 | """ 8 | 9 | 10 | 11 | w3 = Web3(Web3.HTTPProvider(netConfig["_urlSkale"])) 12 | 13 | 14 | def get_blacklist(hashfile: str): 15 | blacklist = [x.replace('"','').strip() for x in requests.get("https://ipfs.io/ipfs/"+hashfile, allow_redirects=True).text.replace("\r","").replace("\n","")[19:-2].split(",")] 16 | return blacklist 17 | 18 | 19 | 20 | class Validator(): 21 | 22 | def __init__(self, app): 23 | 24 | self.app = app 25 | 26 | self._blacklist = get_blacklist("QmT4PyxSJX2yqYpjypyP75PR7FacBQDyES4Mdvg8m5Hrxj") 27 | self._contract = self.app.cm.instantiateContract("DataSpotting") 28 | 29 | self._rewardsInfoLastTimestamp = 0 30 | 31 | self._isApproved = False 32 | self._isRegistered = False 33 | self._isRunning = False 34 | self._lastProcessedBatchId = 0 35 | self._results = {"Advertising":0, 36 | "Blacklist":0, 37 | "Censoring":0, 38 | "Duplicates":0, 39 | "Empty":0, 40 | "Spam":0, 41 | "Validated":0 42 | } 43 | self._languages = dict() 44 | self.nbItems = 0 45 | self.current_batch = 0 46 | self.current_item = 0 47 | self.batchLength = 0 48 | self.gateWays = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/ipfs_gateways.txt").text.split("\n") 49 | 50 | now_ts = time.time() 51 | delay_between_rewardsInfo = 10*60 #10 min 52 | try: 53 | if general_printing_enabled: 54 | if ( now_ts -self._rewardsInfoLastTimestamp ) > delay_between_rewardsInfo or self._rewardsInfoLastTimestamp == 0: 55 | main_addr = self.app.localconfig["ExordeApp"]["MainERCAddress"] 56 | exdt_rewards = round(self.app.cm.instantiateContract("RewardsManager").functions.RewardsBalanceOf(main_addr).call()/(10**18),2) 57 | rep_amount = round(self.app.cm.instantiateContract("Reputation").functions.balanceOf(main_addr).call()/(10**18),2) 58 | print("[CURRENT REWARDS & REP] Main Address {}, REP = {} and EXDT Rewards = {} ".format(str(main_addr), rep_amount, exdt_rewards)) 59 | self._rewardsInfoLastTimestamp = now_ts 60 | except: 61 | time.sleep(2) 62 | pass 63 | 64 | if validation_printing_enabled: 65 | print("[Validation] sub routine instancied") 66 | self.totalNbBatch = 0 67 | 68 | # tokenizer = AutoTokenizer.from_pretrained("alonecoder1337/bert-explicit-content-classification") 69 | # model = AutoModelForSequenceClassification.from_pretrained("alonecoder1337/bert-explicit-content-classification") 70 | # self._explicitPipeline = transformers.pipeline("text-classification",model=model,tokenizer=tokenizer, return_all_scores=True) 71 | 72 | try: 73 | self.spammerList = self.downloadFile(self.app.cm.instantiateContract("ConfigRegistry").functions.get("spammerList").call())["spammers"] 74 | except: 75 | self.spammerList = self.downloadFile("QmStbdSQ8KBM72uAoqjcQEhJanhq2J8J2Q3ReijwxYFzme")["spammers"] 76 | 77 | try: 78 | print("\t[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"REGISTERING", "-", "Registering worker online for work.".format(len(fileList)))) 79 | self.register() 80 | except Exception as e: 81 | #print(e) 82 | self._isRegistered = False 83 | #print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"CHECKER", "__init__", "COMPLETE")) 84 | self.checkThread = threading.Thread(target=self.manage_checking) 85 | self.checkThread.daemon = True 86 | self.checkThread.start() 87 | 88 | def threadHunter(self, thread): 89 | 90 | time.sleep(10) 91 | if(self.status == "DOWNLOADING"): 92 | del thread 93 | try: 94 | self.send_votes(self.current_batch, [], "DLERROR", 0, 0) 95 | except: 96 | pass 97 | else: 98 | pass 99 | 100 | def manage_checking(self): 101 | i = 0 102 | while True: 103 | if validation_printing_enabled: 104 | print("[Validation] Lauching the check content routine") 105 | exec("x{} = threading.Thread(target=self.check_content)".format(i)) 106 | exec("x{}.daemon = True".format(i)) 107 | exec("x{}.start()".format(i)) 108 | time.sleep(60*3.5) 109 | i += 1 110 | if i >= 250000: 111 | i = 0 112 | 113 | 114 | 115 | def register(self): 116 | 117 | if validation_printing_enabled: 118 | print("[Validation] DataSpotting contract instanciated") 119 | 120 | self._isApproved = self.app.cm.StakeManagement(self.app.tm) or self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() 121 | self._isRegistered = self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() 122 | 123 | 124 | if(self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() == False): 125 | 126 | if(self._isApproved == False and self._isRegistered == False): 127 | 128 | trials = 0 129 | 130 | while(self._isApproved == False or trials < 5): 131 | self._isApproved = self.app.cm.readStake() 132 | if(self._isApproved == True): 133 | 134 | increment_tx = self._contract.functions.RegisterWorker().buildTransaction( 135 | { 136 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 137 | 'gasPrice': w3.eth.gas_price, 138 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 139 | } 140 | ) 141 | 142 | self.app.tm.waitingRoom_VIP.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 143 | 144 | time.sleep(30) 145 | 146 | _isRegisteredTrials = 0 147 | while(_isRegisteredTrials < 5): 148 | time.sleep(0.5) 149 | if(self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() == True): 150 | self._isRegistered = True 151 | break 152 | else: 153 | _isRegisteredTrials += 1 154 | time.sleep(30) 155 | if(_isRegisteredTrials == 5 and self._isRegistered == False): 156 | print("Initialization error", 157 | "Something went wrong while registering your worker address on the Validation Worksystem.\nPlease try restarting your application.") 158 | 159 | else: 160 | 161 | self.app.cm.StakeManagement(self.app.tm) 162 | trials += 1 163 | time.sleep(30) 164 | 165 | if(trials >= 5 and self._isRegistered == False): 166 | print("Initialization error", 167 | "Something went wrong while registering1 your worker address on the Validation Worksystem.\nPlease try restarting your application.") 168 | os._exit(0) 169 | 170 | elif(self._isApproved == True and self._isRegistered == False): 171 | 172 | increment_tx = self._contract.functions.RegisterWorker().buildTransaction( 173 | { 174 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 175 | 'gasPrice': w3.eth.gas_price, 176 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 177 | } 178 | ) 179 | 180 | self.app.tm.waitingRoom_VIP.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 181 | 182 | if(self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() == False): 183 | increment_tx = self._contract.functions.RegisterWorker().buildTransaction( 184 | { 185 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 186 | 'gasPrice': w3.eth.gas_price, 187 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 188 | } 189 | ) 190 | 191 | self.app.tm.waitingRoom_VIP.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 192 | 193 | time.sleep(30) 194 | 195 | _isRegisteredTrials = 0 196 | while(_isRegisteredTrials < 5): 197 | if(self._contract.functions.isWorkerRegistered(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() == True): 198 | self._isRegistered = True 199 | time.sleep(0.5) 200 | break 201 | else: 202 | _isRegisteredTrials += 1 203 | time.sleep(30) 204 | if(_isRegisteredTrials == 5 and self._isRegistered == False): 205 | print("Initialization error", 206 | "Something went wrong while registering your worker address on the Validation Worksystem.\nPlease try restarting your application.") 207 | os._exit(0) 208 | 209 | elif(self._isRegistered == True): 210 | return 211 | 212 | 213 | def downloadFile(self, hashname: str): 214 | headers = { 215 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36', 216 | 'Connection':'close' 217 | } 218 | 219 | trials = 0 220 | for gateway in ["https://ipfs.filebase.io/ipfs/", 221 | "https://ipfs.eth.aragon.network/ipfs/", 222 | "https://api.ipfsbrowser.com/ipfs/get.php?hash="]: 223 | url = gateway + hashname 224 | trials = 0 225 | while trials < 5: 226 | try: 227 | r = requests.get(url, headers=headers, allow_redirects=True, stream=True, timeout=3) # 228 | if(r.status_code == 200): 229 | try: 230 | return r.json() 231 | except: 232 | pass 233 | else: 234 | #print(r.__dict__) 235 | trials += 1 236 | except Exception as e: 237 | trials += 1 238 | time.sleep(1+trials) 239 | if(trials >= 5): 240 | break 241 | if(trials == 5): 242 | break 243 | #print("Couldn't download file", hashname) 244 | return None 245 | 246 | 247 | def get_content(self): 248 | 249 | self.status = "DOWNLOADING" 250 | 251 | headers = { 252 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36' 253 | } 254 | 255 | status = "" 256 | max_trials_ = 2 257 | timeout_ = 3 258 | 259 | if detailed_validation_printing_enabled: 260 | print("[Validation] Checking if worker is registered already") 261 | 262 | str_my_address = self.app.localconfig["ExordeApp"]["ERCAddress"] 263 | 264 | 265 | for trial in range(max_trials_): 266 | try: 267 | if(self._contract.functions.isWorkerRegistered(str_my_address).call() == False): 268 | 269 | if validation_printing_enabled: 270 | print("[Validation] Worker {} Not registered".format(str_my_address)) 271 | self.register() 272 | print("\t[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"REGISTERING", "-", "Registering worker online for work.".format(len(fileList)))) 273 | else: 274 | if validation_printing_enabled: 275 | print("[Validation] Worker {} already registered".format(str_my_address)) 276 | break 277 | except: 278 | time.sleep(3) 279 | pass 280 | 281 | 282 | try: 283 | _isNewWorkAvailable = self._contract.functions.IsNewWorkAvailable(self.app.localconfig["ExordeApp"]["ERCAddress"]).call() 284 | except: 285 | _isNewWorkAvailable = False 286 | 287 | 288 | if(_isNewWorkAvailable == False): 289 | if validation_printing_enabled: 290 | print("[Validation] No new work, standby.") 291 | return None, [] 292 | else: 293 | if validation_printing_enabled: 294 | print("[Validation] New Work Available Detected.") 295 | print("[Validation] Fetching Work Batch ID") 296 | try: 297 | for trial in range(max_trials_): 298 | try: 299 | gateways = requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/ipfs_gateways.txt").text.split("\n")[:-1] 300 | except: 301 | time.sleep(3) 302 | pass 303 | nb_gateways = len(gateways) 304 | 305 | try: 306 | batchId = int(self._contract.functions.GetCurrentWork(self.app.localconfig["ExordeApp"]["ERCAddress"]).call()) 307 | except: 308 | batchId = 0 309 | if(batchId > self._lastProcessedBatchId and batchId > self.current_batch): 310 | 311 | self.current_batch = batchId #moved up 312 | 313 | dataBlocks = list() 314 | try: 315 | fileList = self._contract.functions.getIPFShashesForBatch(batchId).call() 316 | except: 317 | fileList = [] 318 | 319 | if validation_printing_enabled: 320 | print("\t[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"DATA BATCH VALIDATION", "Batch ID = {}".format(batchId), "PROCESSING {} batch files.".format(len(fileList)))) 321 | 322 | for i in range(len(fileList)): 323 | file = fileList[i] 324 | 325 | if detailed_validation_printing_enabled: 326 | print("\t\tDownloading IPFS sub-file -> ",file," ... ", end='') 327 | isOk = False 328 | # retry all gateways twice, after pause of 10s in between, before giving up on a batch 329 | for trial in range(max_trials_): 330 | _used_timeout = timeout_*(1+trial) 331 | time.sleep(trial+0.1) 332 | #print("trial n°",trial,"/",(max_trials_-1)) 333 | ## initialize the gateway loop 334 | gateway_cursor = 0 335 | ### iterate a trial of the download over all gateways we have 336 | for gateway_ in gateways: 337 | _used_gateway = gateways[gateway_cursor] 338 | _used_gateway = random.choice(gateways) 339 | try: 340 | _endpoint_url = _used_gateway+file 341 | #content = urllib.request.urlopen(_endpoint_url, timeout=_used_timeout) 342 | time.sleep(1) 343 | try: 344 | content = requests.get(_endpoint_url, headers=headers, allow_redirects=True, stream=True, timeout=3) 345 | if detailed_validation_printing_enabled: 346 | print(" downloaded.") 347 | except Exception as e: 348 | # print(e) 349 | 350 | if detailed_validation_printing_enabled: 351 | print(",", end='') 352 | try: 353 | content = content.json() 354 | content = content["Content"] 355 | except Exceptin as e: 356 | content = None 357 | for item in content: 358 | try: 359 | dataBlocks.append(item) 360 | except Exception as e: 361 | if detailed_validation_printing_enabled: 362 | print("\tDataBlock error", e, item) 363 | pass 364 | if(len(content)>0): 365 | isOk = True 366 | time.sleep(1) 367 | break 368 | except Exception as e: 369 | gateway_cursor += 1 370 | if gateway_cursor>=nb_gateways: 371 | #print("\t----Tried all gateways") 372 | break 373 | ## Break from gateway loop if we got the file 374 | if isOk: 375 | break 376 | time.sleep(0.5) 377 | ## Break from trial loop if we got the file 378 | if isOk: 379 | break 380 | time.sleep(0.1) 381 | 382 | if detailed_validation_printing_enabled: 383 | print("\tData Batch files fetched sucessfully.") 384 | 385 | self._lastProcessedBatchId = batchId 386 | 387 | return batchId, dataBlocks 388 | 389 | 390 | except Exception as e: 391 | print(e) 392 | pass 393 | 394 | 395 | return None, [] 396 | 397 | 398 | def isSpamContent(self, text): 399 | 400 | if(text in self.spammerList): 401 | return True 402 | else: 403 | return False 404 | 405 | def isExplicitContent(self, text): 406 | return False 407 | 408 | def isAdvertisingContent(self, text, debug_=False): 409 | regex = r"(https?://[^\s]+)" 410 | if debug_: 411 | print("isAdvertisingContent debug ", regex) 412 | 413 | url_founds = re.findall(regex,text) 414 | if debug_: 415 | print("URL Found in content = ",url_founds) 416 | print("Number of URL Found in content = ",len(url_founds)) 417 | if(len(url_founds) >= 4): 418 | 419 | if debug_: 420 | print("isAdvertisingContent ADVERTISING DETECTED") 421 | return True 422 | else: 423 | return False 424 | 425 | def generateFileName(self): 426 | random.seed(random.random()) 427 | baseSeed = ''.join(random.choices(string.ascii_uppercase + string.digits, k=256)) 428 | fileName = baseSeed + '.txt' 429 | return fileName 430 | 431 | def filebase_download(self, bucketName, keyName): 432 | 433 | s3 = boto3.client( 434 | 's3', 435 | endpoint_url = 'https://s3.filebase.com', 436 | region_name='us-east-1', 437 | aws_access_key_id='24C83682E3758DA63DD9', 438 | aws_secret_access_key='B149EQGd1WwGLpuWHgPGT5wQ5OqgXPq3AOQtTeBr' 439 | ) 440 | keyName = "QmdjDzRZGZEVzNnnViRzPgMLSjrTC12CH4usqqGCc3UBMc" 441 | # bucketName = "exorde-spotdata-1" 442 | response = s3.get_object(Bucket = bucketName, Key=keyName) 443 | 444 | return response 445 | 446 | def filebase_upload(self, content: str, bucket_name: str): 447 | 448 | s3 = boto3.resource( 449 | 's3', 450 | endpoint_url = 'https://s3.filebase.com', 451 | region_name='us-east-1', 452 | aws_access_key_id='24C83682E3758DA63DD9', 453 | aws_secret_access_key='B149EQGd1WwGLpuWHgPGT5wQ5OqgXPq3AOQtTeBr' 454 | ) 455 | response = s3.Object(bucket_name, self.generateFileName()).put(Body=content) 456 | 457 | return response["ResponseMetadata"]["HTTPHeaders"]['x-amz-meta-cid'] 458 | 459 | def isCommitPeriodActive(self, batchId): 460 | 461 | _secondsToWait = 5 462 | _isPeriodActive = False 463 | 464 | for i in range(5): 465 | try: 466 | _isPeriodActive = self._contract.functions.commitPeriodActive(batchId).call() 467 | time.sleep(0.1) 468 | if(_isPeriodActive == True): 469 | break 470 | except: 471 | time.sleep(_secondsToWait*i) 472 | 473 | return _isPeriodActive 474 | 475 | def isRevealPeriodActive(self, batchId): 476 | 477 | _secondsToWait = 5 478 | _isPeriodActive = False 479 | 480 | for i in range(6): 481 | try: 482 | time.sleep(0.1) 483 | _isPeriodActive = self._contract.functions.revealPeriodActive(batchId).call() 484 | if(_isPeriodActive == True): 485 | break 486 | except: 487 | time.sleep(_secondsToWait*i) 488 | 489 | return _isPeriodActive 490 | 491 | def send_votes(self, batchId, results, status, batchResult, randomSeed): 492 | 493 | self.status = "VOTING" 494 | 495 | if validation_printing_enabled: 496 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VOTING", "send_votes", " BatchStatus({})".format(batchResult))) 497 | 498 | 499 | _isUploaded = False 500 | _uploadTrials = 0 501 | 502 | if validation_printing_enabled: 503 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"UPLOADING FILE", "send_votes", " BatchStatus({})".format(batchResult))) 504 | res = "" 505 | 506 | while(_isUploaded == False or _uploadTrials < 5): 507 | time.sleep(1) 508 | if(res == ""): 509 | try: 510 | configRegistry_ = self.app.cm.instantiateContract("ConfigRegistry") 511 | 512 | trials_ = 0 513 | bucket_to_upload = "exorde-spotdata-1" 514 | while True: 515 | time.sleep(0.1) 516 | try: 517 | # print("bucket_to_upload try") 518 | bucket_to_upload = configRegistry_.functions.get("SpotcheckBucket").call() 519 | break 520 | except: 521 | # print("fail spotcheck bucket recup, retry") 522 | trials_ += 1 523 | time.sleep(2) 524 | if trials_ > 5: 525 | break 526 | 527 | trials_ = 0 528 | while True: 529 | time.sleep(0.1) 530 | try: 531 | if validation_printing_enabled: 532 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"FILE UPLOAD ATTEMPT ", "send_votes", " Bucket({})".format(bucket_to_upload))) 533 | res = self.filebase_upload(json.dumps({"Content":results}, indent=4, sort_keys=True, default=str), bucket_to_upload ) 534 | break 535 | except: 536 | if validation_printing_enabled: 537 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"FILE UPLOAD RETRY ", "send_votes", " Bucket({})".format(bucket_to_upload))) 538 | trials_ += 1 539 | time.sleep(2) 540 | if trials_ > 5: 541 | break 542 | 543 | _isUploaded = True 544 | 545 | if validation_printing_enabled: 546 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"FILE UPLOADED ", "send_votes", " Bucket({})".format(bucket_to_upload))) 547 | break 548 | except: 549 | if validation_printing_enabled: 550 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"FILE UPLOAD FAILED ", "send_votes", " Bucket({})".format(bucket_to_upload))) 551 | _uploadTrials += 1 552 | time.sleep(5*(_uploadTrials+1)) 553 | if(_uploadTrials >= 5): 554 | break 555 | else: 556 | break 557 | if(_uploadTrials >= 5): 558 | break 559 | 560 | if validation_printing_enabled: 561 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"UPLOADING DONE", "send_votes", " BatchStatus({})".format(batchResult))) 562 | 563 | try: 564 | if(res != "" or status != "Success"): 565 | if(self._contract.functions.isWorkerAllocatedToBatch(batchId, self.app.localconfig["ExordeApp"]["ERCAddress"])): #here 566 | 567 | try: 568 | _didCommit = self._contract.functions.didCommit(self.app.localconfig["ExordeApp"]["ERCAddress"], batchId).call() 569 | except: 570 | _didCommit = False 571 | 572 | if detailed_validation_printing_enabled: 573 | print("\t[Validation - L2] didCommit = ",_didCommit) 574 | 575 | if(_didCommit == False): 576 | 577 | if detailed_validation_printing_enabled: 578 | print("\t[Validation - L2] didCommit False loop => ") 579 | 580 | try: 581 | _commitPeriodOver = self._contract.functions.commitPeriodOver(batchId).call() 582 | except: 583 | _commitPeriodOver = False 584 | 585 | if detailed_validation_printing_enabled: 586 | print("\t[Validation - L2] _commitPeriodOver = ",_commitPeriodOver) 587 | if(_commitPeriodOver == False): 588 | drop = False 589 | try: 590 | 591 | while True: 592 | time.sleep(1) 593 | try: 594 | 595 | try: 596 | _commitPeriodActive = self._contract.functions.commitPeriodActive(batchId).call() 597 | except: 598 | _commitPeriodActive = False 599 | 600 | if detailed_validation_printing_enabled: 601 | print("\t[Validation - L2] _commitPeriodActive({}) = ".format(batchId),_commitPeriodActive) 602 | 603 | if(_commitPeriodActive == True): 604 | if detailed_validation_printing_enabled: 605 | print("\t[Validation - L2] _commitPeriodActive is true") 606 | drop = False 607 | break 608 | else: 609 | time.sleep(5) 610 | if detailed_validation_printing_enabled: 611 | print("\t[Validation - L2] _commitPeriodActive false so wait 5s") 612 | 613 | try: 614 | _commitPeriodOver = self._contract.functions.commitPeriodOver(batchId).call() 615 | except: 616 | _commitPeriodOver = False 617 | 618 | if detailed_validation_printing_enabled: 619 | print("\t[Validation - L2] _commitPeriodOver false so wait 5s") 620 | if(_commitPeriodOver == True): 621 | drop = True 622 | break 623 | except: 624 | time.sleep(30) 625 | 626 | except Exception as e: 627 | pass 628 | 629 | if(drop == False): 630 | hasCommitted = False 631 | while(hasCommitted == False): 632 | if(hasCommitted == False): 633 | try: 634 | time.sleep(0.5) 635 | increment_tx = self._contract.functions.commitSpotCheck(batchId, self._contract.functions.getEncryptedStringHash(res, randomSeed).call(), self._contract.functions.getEncryptedHash(batchResult, randomSeed).call(), len(results), status).buildTransaction( 636 | { 637 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 638 | 'gasPrice': w3.eth.gas_price, 639 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 640 | } 641 | ) 642 | self.app.tm.waitingRoom_VIP.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 643 | hasCommitted = True 644 | 645 | if validation_printing_enabled: 646 | print("\t[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "send_votes", "SUBMISSION & VOTE ENCRYPTED - Commited({})".format(batchId))) 647 | 648 | 649 | break 650 | except Exception as e: 651 | time.sleep(30) 652 | else: 653 | break 654 | 655 | while True: 656 | time.sleep(1) 657 | try: 658 | 659 | try: 660 | _revealPeriodActive = self._contract.functions.revealPeriodActive(batchId).call() 661 | except: 662 | _revealPeriodActive = False 663 | 664 | if detailed_validation_printing_enabled: 665 | print("\t[Validation - L2] _revealPeriodActive = ",_revealPeriodActive) 666 | if(_revealPeriodActive == True): 667 | break 668 | else: 669 | time.sleep(10) 670 | except: 671 | time.sleep(30) 672 | 673 | 674 | while True: 675 | time.sleep(1) 676 | try: 677 | _revealPeriodOver = self._contract.functions.revealPeriodOver(batchId).call() 678 | except: 679 | _revealPeriodOver = False 680 | 681 | if detailed_validation_printing_enabled: 682 | print("\t[Validation - L2] _revealPeriodOver = ",_revealPeriodOver) 683 | 684 | if(_revealPeriodOver == False): 685 | if detailed_validation_printing_enabled: 686 | print("\t[Validation - L2] _revealPeriodOver FALSE loop ") 687 | try: 688 | 689 | try: 690 | _didReveal = self._contract.functions.didReveal(self.app.localconfig["ExordeApp"]["ERCAddress"], batchId).call() 691 | except: 692 | _didReveal = False 693 | if detailed_validation_printing_enabled: 694 | print("\t[Validation - L2] didReveal ",_didReveal) 695 | 696 | if(_didReveal == False): 697 | 698 | try: 699 | _didCommit = self._contract.functions.didCommit(self.app.localconfig["ExordeApp"]["ERCAddress"], batchId).call() 700 | except: 701 | _didCommit = True 702 | 703 | if detailed_validation_printing_enabled: 704 | print("\t[Validation - L2] _revealPeriodOver _didCommit ",_didCommit) 705 | 706 | if(_didCommit == True): 707 | hasRevealed = False 708 | while(hasRevealed == False): 709 | time.sleep(0.5) 710 | try: 711 | increment_tx = self._contract.functions.revealSpotCheck(batchId, res, batchResult, randomSeed).buildTransaction( 712 | { 713 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 714 | 'gasPrice': w3.eth.gas_price, 715 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 716 | } 717 | ) 718 | self.app.tm.waitingRoom_VIP.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 719 | hasRevealed = True 720 | 721 | if validation_printing_enabled: 722 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "send_votes", "SUBMISSION & VOTE - Revealed ({})".format(batchId))) 723 | 724 | time.sleep(3) 725 | self._lastProcessedBatchId = batchId 726 | break 727 | except Exception as e: 728 | pass 729 | break 730 | except Exception as e: 731 | break 732 | else: 733 | break 734 | 735 | 736 | else: 737 | if detailed_validation_printing_enabled: 738 | print("\t[Validation - L2] waiting 5s") 739 | time.sleep(5) 740 | else: 741 | print("\t[Validation - L2] Worker not allocated the batch! [Error]") 742 | 743 | except Exception as e: 744 | print(e) 745 | pass 746 | 747 | def process_batch(self, batchId, documents): 748 | 749 | if(batchId != None): 750 | 751 | if validation_printing_enabled: 752 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "process_batch", "PROCESSING DATA({})".format(batchId))) 753 | 754 | try: 755 | randomSeed = random.randint(0,999999999) 756 | results = dict() 757 | ram = list() 758 | 759 | if(len(documents) > 0): 760 | try: 761 | 762 | batchResult = 1 763 | for i in range(len(documents)): 764 | if i%50 == 0 and detailed_validation_printing_enabled: 765 | print("\t\t -> Web Content item ",int(i)," / ",len(documents)) 766 | 767 | try: 768 | self.current_item = i 769 | document = documents[i] 770 | except: 771 | document = None 772 | 773 | try: 774 | response = 1 775 | if(document != None): 776 | 777 | document["item"]["Content"] = document["item"]["Content"].replace('"','\"') 778 | 779 | 780 | try: 781 | debug_toggle = False 782 | if(self.isAdvertisingContent(str(document["item"]["Content"]), debug_=debug_toggle)): 783 | 784 | self._results["Advertising"] += 1 785 | response = 0 786 | 787 | 788 | if (document["item"]["Content"].strip() in ("[removed]", "[deleted]", "[citation needed]", "", "None")): 789 | self._results["Empty"] += 1 790 | response = 0 791 | 792 | if(self.isExplicitContent(document["item"]["Content"])): 793 | self._results["Censoring"] += 1 794 | response = 0 795 | 796 | if(document["item"]["Url"] in self._blacklist or document["item"]["DomainName"] in self._blacklist): 797 | self._results["Blacklist"] += 1 798 | response = 0 799 | 800 | if(self.isSpamContent(document["item"]["Author"])): 801 | self._results["Spam"] += 1 802 | response = 0 803 | 804 | if(document["item"]["Url"] in ram): 805 | self._results["Duplicates"] += 1 806 | response = 0 807 | 808 | if(response == 1): 809 | self._results["Validated"] += 1 810 | 811 | if(document["item"]["Language"] not in self._languages): 812 | self._languages[document["item"]["Language"]] = 1 813 | else: 814 | self._languages[document["item"]["Language"]] += 1 815 | 816 | results[document["item"]["Url"]] = document 817 | 818 | ram.append(document["item"]["Url"]) 819 | 820 | self.nbItems += 1 821 | except Exception as e: 822 | print("Exception during processing: ",e) 823 | 824 | response = 0 825 | self.nbItems += 1 826 | except Exception as e: 827 | 828 | print("Exception catched = ",e) 829 | self.nbItems += 1 830 | response = 0 831 | 832 | status = "Success" 833 | if validation_printing_enabled: 834 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "Processing Status:", " [{}] ".format(status))) 835 | 836 | x = threading.Thread(target=self.send_votes, args=(batchId, results, status, batchResult, randomSeed,)) 837 | x.daemon = True 838 | x.start() 839 | except Exception as e: 840 | status = "Failure" 841 | if validation_printing_enabled: 842 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "Processing Status:", " [{}] ".format(status))) 843 | batchResult = 0 844 | x = threading.Thread(target=self.send_votes, args=(batchId, results, status, batchResult, randomSeed,)) 845 | x.daemon = True 846 | x.start() 847 | 848 | elif(len(documents) == 0): 849 | status = "NoData" 850 | batchResult = 0 851 | x = threading.Thread(target=self.send_votes, args=(batchId, results, status, batchResult, randomSeed,)) 852 | x.daemon = True 853 | x.start() 854 | except Exception as e: 855 | #print(e) 856 | pass 857 | 858 | def check_content(self, doc:str = ""): 859 | 860 | 861 | try: 862 | 863 | now_ts = time.time() 864 | delay_between_rewardsInfo = 10*60 #10 min 865 | try: 866 | if general_printing_enabled: 867 | if ( now_ts -self._rewardsInfoLastTimestamp ) > delay_between_rewardsInfo or self._rewardsInfoLastTimestamp == 0: 868 | main_addr = self.app.localconfig["ExordeApp"]["MainERCAddress"] 869 | exdt_rewards = round(self.app.cm.instantiateContract("RewardsManager").functions.RewardsBalanceOf(main_addr).call()/(10**18),2) 870 | rep_amount = round(self.app.cm.instantiateContract("Reputation").functions.balanceOf(main_addr).call()/(10**18),2) 871 | print("[CURRENT REWARDS & REP] Main Address {}, REP = {} and EXDT Rewards = {} ".format(str(main_addr), rep_amount, exdt_rewards)) 872 | self._rewardsInfoLastTimestamp = now_ts 873 | except: 874 | time.sleep(2) 875 | pass 876 | 877 | batchId, documents = self.get_content() 878 | 879 | if(batchId != None): 880 | 881 | if(batchId != None and batchId >= self.current_batch): 882 | if validation_printing_enabled: 883 | print("[{}]\t{}\t{}\t\t{}".format(dt.datetime.now(),"VALIDATION", "check_content", "PROCESSING({})".format(batchId))) 884 | try: 885 | self.totalNbBatch += 1 886 | self.batchLength = len(documents) 887 | 888 | self.process_batch(batchId, documents) 889 | self._lastProcessedBatchId = batchId 890 | self._isRunning = False 891 | except Exception as e: 892 | self._isRunning = False 893 | else: 894 | self.status = "OLDJOB" 895 | else: 896 | self.status = "NOJOB" 897 | 898 | except Exception as e: 899 | self._isRunning = False -------------------------------------------------------------------------------- /Scraper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 20 14:20:53 2022 4 | 5 | @author: florent, mathias 6 | Exorde Labs 7 | """ 8 | from datetime import datetime as dt 9 | from datetime import timedelta 10 | from datetime import date 11 | import subprocess 12 | def install_upgrade(package): 13 | subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--user", "--upgrade"]) 14 | 15 | #### INIT 16 | try: 17 | install_upgrade("snscrape") 18 | except: 19 | print("failed upgrading package...") 20 | 21 | import snscrape.modules 22 | 23 | CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') 24 | 25 | def cleanhtml(raw_html): 26 | cleantext = re.sub(CLEANR, '', raw_html) 27 | return cleantext 28 | 29 | 30 | def preprocess(text): 31 | new_text = [ 32 | ] 33 | for t in text.split(" "): 34 | t = '@user' if t.startswith('@') and len(t) > 1 else t 35 | t = 'http' if t.startswith('http') else t 36 | new_text.append(t) 37 | return " ".join(new_text) 38 | 39 | def generateFileName(): 40 | random.seed(random.random()) 41 | baseSeed = ''.join(random.choices(string.ascii_uppercase + string.digits, k=256)) 42 | fileName = baseSeed + '.txt' 43 | return fileName 44 | 45 | def ipfs_pin_upload(content: str): 46 | ## upload file & pin to IPFS network 47 | newHeaders = {'Content-type': 'application/json', 'Accept': 'text/plain'} 48 | endpoint_url_ = 'http://ipfs-api.exorde.network/add' 49 | time.sleep(0.5) 50 | try: 51 | response = requests.post(endpoint_url_, data=content ,headers=newHeaders) 52 | except Exception as e: 53 | print("request pin upload error: ",e) 54 | 55 | response_ok = False 56 | json_response = None 57 | try: 58 | json_response = json.loads(response.text) 59 | response_ok = True 60 | except: 61 | response_ok = False 62 | 63 | print("[Spotting] json response = ",json_response) 64 | if response_ok: 65 | cid_obtained = json_response["cid"] 66 | else: 67 | cid_obtained = None 68 | return cid_obtained 69 | 70 | 71 | def gen_chan(r): 72 | for idx, page in enumerate(r): 73 | for thread in r[idx]['threads']: 74 | yield thread 75 | 76 | def get_threads(threads, key: str, default='NaN'): 77 | return threads.get(key, default) 78 | 79 | def downloadFile(hashname: str, name: str): 80 | 81 | headers = { 82 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36', 83 | } 84 | 85 | url = "https://ipfs.filebase.io/ipfs/" + hashname 86 | r = requests.get(url, headers=headers, allow_redirects=True, 87 | stream=True, timeout=1200) 88 | 89 | try: 90 | os.mkdir("ExordeWD") 91 | except: 92 | pass 93 | 94 | open('ExordeWD\\'+name, 'wb').write(r.content) 95 | 96 | def downloadFile2Data2(hashname: str): 97 | 98 | headers = { 99 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36', 100 | } 101 | url = "https://w3s.link/ipfs/" + hashname 102 | 103 | trials = 0 104 | 105 | while trials < 5: 106 | try: 107 | r = requests.get(url, timeout=(0.5,3)) 108 | #print(r) 109 | if (r.status_code == 200): 110 | try: 111 | return r.json() 112 | except Exception as e: 113 | pass 114 | #print(e) 115 | else: 116 | pass 117 | except requests.exceptions.Timeout: 118 | trials += 1 119 | # Maybe set up for a retry, or continue in a retry loop 120 | except requests.exceptions.TooManyRedirects: 121 | trials += 1 122 | # Tell the user their URL was bad and try a different one 123 | except requests.exceptions.RequestException as e: 124 | trials += 1 125 | # catastrophic error. bail. 126 | except Exception as e: 127 | trials += 1 128 | pass 129 | if (trials >= 5): 130 | return None 131 | 132 | class Scraper(): 133 | 134 | def __init__(self, app): 135 | 136 | r = requests.get("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv").text.split("\n") 137 | names = r[0] 138 | data = r[1:] 139 | 140 | self.app = app 141 | self.langcodes = pd.DataFrame(data) 142 | self.langcodes = pd.DataFrame(self.langcodes[0].str.split(",").values.tolist()).reset_index(drop=True) 143 | self.langcodes = self.langcodes[self.langcodes.columns[:-1]] 144 | self.langcodes.columns=names.split(",") 145 | self.stopWords = dict() 146 | self.stopWords["en"] = requests.get("https://raw.githubusercontent.com/LIAAD/yake/master/yake/StopwordsList/stopwords_{}.txt".format("en"), allow_redirects=True, stream=True, timeout=(1,5)).text.replace("\r","").split("\n") 147 | self.models = dict() 148 | self.languages = dict() 149 | self.threads = list() 150 | self.pendingBlocks = list() 151 | self.lastBatchSize = 100 152 | self.nbItems = 0 153 | 154 | self.keywords = list() 155 | 156 | goingOn = True 157 | 158 | try: 159 | os.mkdir("ExordeWD") 160 | except: 161 | pass 162 | 163 | trial = 0 164 | while (os.path.exists('ExordeWD\\lang.json') == False and trial < 3): 165 | downloadFile("QmQK6M7pum6W2ZRLdhgzEw7vH8GYMmvwR3aX3hFkMXWrus", "lang.json") 166 | with open('ExordeWD\\lang.json', "r") as file: 167 | self.lang_table = json.load(file)["langlist"] 168 | if (os.path.exists('ExordeWD\\lang.json') == False): 169 | goingOn = False 170 | self.listlang = self.lang_table 171 | 172 | def manage_scraping(self): 173 | sender = threading.Thread(target=self.manage_sending) 174 | 175 | sender.daemon = True 176 | sender.start() 177 | 178 | try: 179 | while True: 180 | 181 | try: 182 | 183 | threads= list() 184 | removal = list() 185 | 186 | for i in range(len(self.threads)): 187 | 188 | if(self.threads[i].is_alive() == True): 189 | #threads.append(self.threads[i]) 190 | removal.append(self.threads[i]) 191 | else: 192 | removal.append(self.threads[i]) 193 | 194 | self.threads = threads 195 | while(len(removal) > 0): 196 | del removal[0] 197 | 198 | keywords = [x.replace(" ","%20") for x in self.keywords] 199 | 200 | for target in ["4chan", "twitter", "reddit"]: 201 | 202 | # if(target == "4chan"): 203 | # x = threading.Thread(target=self.scrape, args=(target, keywords)) 204 | # x.start() 205 | # self.threads.append(x) 206 | 207 | if(target == "twitter"): 208 | x = threading.Thread(target=self.scrape, args=("twitter1", keywords)) 209 | x.daemon = True 210 | x.start() 211 | self.threads.append(x) 212 | y = threading.Thread(target=self.scrape, args=("twitter2", keywords)) 213 | y.daemon = True 214 | y.start() 215 | self.threads.append(x) 216 | 217 | if(target == "reddit"): 218 | x = threading.Thread(target=self.scrape, args=("reddit1", keywords)) 219 | x.daemon = True 220 | x.start() 221 | self.threads.append(x) 222 | y = threading.Thread(target=self.scrape, args=("reddit2", keywords)) 223 | y.daemon = True 224 | y.start() 225 | self.threads.append(y) 226 | z = threading.Thread(target=self.scrape, args=("reddit3", keywords)) 227 | z.daemon = True 228 | z.start() 229 | self.threads.append(z) 230 | a = threading.Thread(target=self.scrape, args=("reddit4", keywords)) 231 | a.daemon = True 232 | a.start() 233 | self.threads.append(a) 234 | 235 | if(len(self.keywords) > 0): 236 | time.sleep(0.5) 237 | try: 238 | delay = int(_contract.functions.get("autoScrapingFrequency").call()) 239 | if scrape_printing_enabled: 240 | print("[{}]\t{}\t{}\t{}".format(dt.now(),"SLEEP ",delay," s BEFORE NEW COLLECT DATA")) 241 | time.sleep(delay) 242 | except: 243 | random_wait_ = random.randint(3, 10)*60 244 | if scrape_printing_enabled: 245 | print("[{}]\t{}".format(dt.now(),"SLEEP BEFORE NEW DATA COLLECT: "),random_wait_," s") 246 | time.sleep(random_wait_) 247 | else: 248 | time.sleep(0.5) 249 | try: 250 | delay = int(_contract.functions.get("autoScrapingFrequency").call()) 251 | if scrape_printing_enabled: 252 | print("[{}]\t{}\t{}\t{}".format(dt.now(),"SLEEP ",delay," s BEFORE NEW COLLECT DATA")) 253 | time.sleep(delay) 254 | except: 255 | random_wait_ = random.randint(3, 10)*60 256 | if scrape_printing_enabled: 257 | print("[{}]\t{}".format(dt.now(),"SLEEP BEFORE NEW DATA COLLECT: "),random_wait_," s") 258 | time.sleep(random_wait_) 259 | except Exception as e: 260 | print("Error manage_scraping: ",e) 261 | pass 262 | except Exception as e: 263 | pass 264 | 265 | def scrape(self, target: str, keywords: list): 266 | 267 | try: 268 | results = dict() 269 | 270 | try: 271 | exd_token = random.choice(list(pd.DataFrame([x.strip().lower() for x in requests.get("https://raw.githubusercontent.com/exorde-labs/TestnetProtocol/main/targets/keywords.txt").text.replace("\n","").split(",") if x != ""])[0])) 272 | except: 273 | exd_token = "bitcoin" 274 | 275 | if scrape_printing_enabled: 276 | print("[{}]\t{}\t{}\t\t{}".format(dt.now(),"COLLECT DATA", "scrape", "KEYWORDS SELECTED = [{}]".format(exd_token))) 277 | if(exd_token not in keywords): 278 | keywords.append(exd_token) 279 | 280 | if(target == "4chan"): 281 | 282 | try: 283 | 284 | for p in range(1, 10): 285 | #print(p) 286 | for endpoint in [f"https://a.4cdn.org/biz/{p}.json", f"https://a.4cdn.org/news/{p}.json"]: 287 | 288 | r = requests.get(endpoint) 289 | r = json.loads(r.text) 290 | 291 | for post in gen_chan(r): 292 | 293 | for key in post: 294 | 295 | threads = post[key][0] 296 | 297 | no = get_threads(threads,'no') 298 | time = get_threads(threads,'time') 299 | com = cleanhtml(html.unescape(get_threads(threads,'com'))) 300 | name = get_threads(threads,'name') 301 | ids = get_threads(threads,'id') 302 | filename = html.unescape(get_threads(threads,'filename')) + html.unescape(get_threads(threads,'ext')) 303 | replies = get_threads(threads,'replies') 304 | images = get_threads(threads,'images') 305 | url = re.search("(?Phttps?://[^\s]+)", get_threads(threads,'com')).group("url") if re.search("(?Phttps?://[^\s]+)", get_threads(threads,'com')) != None else None 306 | 307 | tr_post = dict() 308 | 309 | tr_post["internal_id"] = str(no) 310 | tr_post["internal_parent_id"] = None 311 | 312 | tr_post["keyword"] = "" 313 | tr_post["mediaType"] = "Social_Networks" 314 | tr_post["domainName"] = "4channel.org" 315 | tr_post["url"] = "https://boards.4channel.org/biz/thread/" + str(no) 316 | tr_post["author"] = name 317 | tr_post["authorLocation"] = "" 318 | tr_post["creationDateTime"] = dt.fromtimestamp(time,pytz.timezone('UTC')) 319 | if(tr_post["creationDateTime"] >= ( dt.now(pytz.timezone('UTC')) - timedelta(minutes=5))): 320 | tr_post["lang"] = detect(text=com.replace("\n",""), low_memory=False)["lang"] 321 | if(tr_post["lang"] in self.languages): 322 | self.languages[tr_post["lang"]] += 1 323 | else: 324 | self.languages[tr_post["lang"]] = 1 325 | tr_post["title"] = '' 326 | tr_post["description"] = '' 327 | tr_post["content"] = com.replace("\n","").replace("'","''") 328 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] not in ("","[deleted]")): 329 | tr_post["content"] = tr_post["title"] 330 | tr_post["controversial"] = False 331 | tr_post["tokenOfInterest"] = list() 332 | max_ngram_size = 1 333 | deduplication_thresold = 0.9 334 | deduplication_algo = 'seqm' 335 | windowSize = 1 336 | numOfKeywords = 20 337 | 338 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 339 | kx = kw_extractor.extract_keywords(tr_post["content"]) 340 | 341 | for kw in kx: 342 | if(kw[0] not in tr_post["tokenOfInterest"]): 343 | tr_post["tokenOfInterest"].append(kw[0]) 344 | 345 | tr_post["reference"] = endpoint.split("/")[3] 346 | tr_post["link"] = None 347 | tr_post["is_video"] = None 348 | tr_post["nb_comments"] = replies 349 | tr_post["nb_shared"] = 0 350 | tr_post["nb_liked"] = 0 351 | tr_post["topics"] = list() 352 | tr_post["entities"] = list() 353 | tr_post["medias"] = list() 354 | tr_post["links"] = list() 355 | 356 | if(tr_post["url"] not in results and tr_post["content"] != 'NaN'): 357 | self.send_doc(tr_post) 358 | 359 | 360 | if 'last_replies' in threads: 361 | for comment in threads['last_replies']: 362 | 363 | com_com = cleanhtml(comment.get('com', 'NaN')) 364 | time_com = comment.get('time', 'NaN') 365 | fname_com = comment.get('filename', 'NaN') 366 | 367 | tr_post = dict() 368 | 369 | tr_post["internal_id"] = str(comment["no"]) 370 | tr_post["internal_parent_id"] = str(no) 371 | 372 | tr_post["keyword"] = "" 373 | tr_post["mediaType"] = "Social_Networks" 374 | tr_post["domainName"] = "4channel.org" 375 | tr_post["url"] = "https://boards.4channel.org/biz/thread/" + str(no) + "/" + str(comment["no"]) 376 | #print(tr_post["url"]) 377 | tr_post["author"] = name 378 | tr_post["authorLocation"] = "" 379 | tr_post["creationDateTime"] = dt.fromtimestamp(time_com,pytz.timezone('UTC')) 380 | tr_post["lang"] = detect(text=com_com.replace("\n",""), low_memory=False)["lang"] 381 | if(tr_post["lang"] in self.languages): 382 | self.languages[tr_post["lang"]] += 1 383 | else: 384 | self.languages[tr_post["lang"]] = 1 385 | tr_post["title"] = '' 386 | tr_post["description"] = '' 387 | tr_post["content"] = com_com.replace("\n","").replace("'","''") 388 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 389 | tr_post["content"] = tr_post["title"] 390 | 391 | tr_post["controversial"] = False 392 | tr_post["tokenOfInterest"] = list() 393 | max_ngram_size = 1 394 | deduplication_thresold = 0.9 395 | deduplication_algo = 'seqm' 396 | windowSize = 1 397 | numOfKeywords = 20 398 | 399 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 400 | kx = kw_extractor.extract_keywords(tr_post["content"]) 401 | 402 | for kw in kx: 403 | if(kw[0] not in tr_post["tokenOfInterest"]): 404 | tr_post["tokenOfInterest"].append(kw[0]) 405 | 406 | tr_post["reference"] = endpoint.split("/")[3] 407 | tr_post["link"] = None 408 | tr_post["is_video"] = None 409 | tr_post["nb_comments"] = 0 410 | tr_post["nb_shared"] = 0 411 | tr_post["nb_liked"] = 0 412 | tr_post["topics"] = list() 413 | tr_post["entities"] = list() 414 | tr_post["medias"] = list() 415 | tr_post["links"] = list() 416 | 417 | if(tr_post["url"] not in results): 418 | self.send_doc(tr_post) 419 | #results[tr_post["url"]] = tr_post 420 | #self.send_item(tr_post) 421 | 422 | 423 | except Exception as e: 424 | # print() 425 | # print(target, e) 426 | # print() 427 | pass 428 | 429 | if(target == "reddit1"): 430 | 431 | try: 432 | 433 | r = requests.get("https://api.pushshift.io/reddit/search/comment/?q="+"|".join(keywords)+"&after=5m").json() 434 | #r = requests.get("https://api.pushshift.io/reddit/search/comment/?q="+keyword+"&after=5m").json() 435 | posts = r["data"] 436 | 437 | 438 | for post in posts: 439 | break 440 | #try: 441 | tr_post = dict() 442 | 443 | tr_post["internal_id"] = str(post["id"]) 444 | tr_post["internal_parent_id"] = post["parent_id"] if post["parent_id"] != None else 0 445 | 446 | 447 | tr_post["domainName"] = "reddit.com" 448 | tr_post["mediaType"] = "Social_Networks" 449 | tr_post["url"] = "https://www.reddit.com" + post["permalink"] 450 | tr_post["author"] = post["author"] 451 | tr_post["authorLocation"] = "" 452 | tr_post["creationDateTime"] = dt.fromtimestamp(post["created_utc"],pytz.timezone('UTC')) 453 | tr_post["lang"] = detect(text=post["body"].replace("\n",""), low_memory=False)["lang"] 454 | if(tr_post["lang"] in self.languages): 455 | self.languages[tr_post["lang"]] += 1 456 | else: 457 | self.languages[tr_post["lang"]] = 1 458 | tr_post["title"] = '' 459 | tr_post["description"] = '' 460 | tr_post["content"] = cleanhtml(post["body"].replace("\n","").replace("'","''")) 461 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 462 | tr_post["content"] = tr_post["title"] 463 | 464 | subkeywords = [x for x in keywords if x in tr_post["content"]] 465 | tr_post["keyword"] = subkeywords[0] if len(subkeywords) != 0 else keywords[0] 466 | 467 | 468 | # tokens = cat_tokenizer(tr_post["content"][:500], return_tensors='pt') 469 | # output = cat_model(**tokens) 470 | # scores = output[0][0].detach().numpy() 471 | # scores = expit(scores) 472 | # predictions = (scores >= 0.5) * 1 473 | # cat_results = list() 474 | # for i in range(len(predictions)): 475 | # if predictions[i]: 476 | # try: 477 | # cat_results.append(cat_class_mapping[i]) 478 | # except: 479 | # pass 480 | # tr_post["categories"] = cat_results 481 | 482 | # try: 483 | # text = preprocess(tr_post["content"]) 484 | # encoded_input = ironizer(text, return_tensors='pt') 485 | # output = mdl_ironizer(**encoded_input) 486 | # scores = output[0][0].detach().numpy() 487 | # scores = softmax(scores) 488 | 489 | # if(scores[0] < scores[1]): 490 | # isIrony = True 491 | # else: 492 | # isIrony = False 493 | # tr_post["isIrony"] = isIrony 494 | # except: 495 | # text = preprocess(tr_post["content"][:500]) 496 | # encoded_input = ironizer(text, return_tensors='pt') 497 | # output = mdl_ironizer(**encoded_input) 498 | # scores = output[0][0].detach().numpy() 499 | # scores = softmax(scores) 500 | 501 | # if(scores[0] < scores[1]): 502 | # isIrony = True 503 | # else: 504 | # isIrony = False 505 | # tr_post["isIrony"] = isIrony 506 | 507 | # try: 508 | # tr_post["emotions"] = self.calc_emotions(tr_post["content"]) 509 | # except: 510 | # tr_post["emotions"] = self.calc_emotions(tr_post["content"][:500]) 511 | # if(len(tr_post["content"]) < 50): 512 | # tr_post["emotions"] = tr_post["emotions"].loc[:2] 513 | 514 | 515 | # try: 516 | # tr_post["sentiment"] = tr_post["emotions"].loc[0, "label"] 517 | # except: 518 | # tr_post["sentiment"] = tr_post["emotions"].loc[0, "label"] 519 | tr_post["controversial"] = False 520 | # tr_post["toxic"] = self.models["toxicity"][0].predict(self.models["toxicity"][1].transform([tr_post["content"]]))[0] 521 | # tr_post["censored"] = (self.models["censoring"][0].predict(self.models["censoring"][1].transform([tr_post["content"]]))[0] or self.models["censoring"][0].predict(self.models["censoring"][1].transform([tr_post["url"]]))[0]) 522 | 523 | tr_post["tokenOfInterest"] = list() 524 | max_ngram_size = 1 525 | deduplication_thresold = 0.9 526 | deduplication_algo = 'seqm' 527 | windowSize = 1 528 | numOfKeywords = 20 529 | 530 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 531 | kx = kw_extractor.extract_keywords(tr_post["content"]) 532 | 533 | for kw in kx: 534 | if(kw[0] not in tr_post["tokenOfInterest"]): 535 | tr_post["tokenOfInterest"].append(kw[0]) 536 | 537 | tr_post["reference"] = post["subreddit"] 538 | tr_post["link"] = None 539 | tr_post["is_video"] = None 540 | tr_post["nb_comments"] = 0 541 | tr_post["nb_shared"] = 0 542 | tr_post["nb_liked"] = 0 543 | tr_post["topics"] = list() 544 | tr_post["entities"] = list() 545 | tr_post["medias"] = list() 546 | tr_post["links"] = list() 547 | 548 | 549 | if(tr_post["url"] not in results): 550 | #results[tr_post["url"]] = tr_post 551 | self.send_doc(tr_post) 552 | #self.send_item(tr_post) 553 | 554 | except Exception as e: 555 | # print() 556 | # print(target, e) 557 | # print() 558 | pass 559 | 560 | if(target == "reddit2"): 561 | 562 | r = requests.get("https://api.pushshift.io/reddit/search/submission/?q="+"|".join(keywords)+"&after=5m").json() 563 | posts = r["data"] 564 | 565 | for post in posts: 566 | try: 567 | tr_post = dict() 568 | 569 | tr_post["internal_id"] = str(post["id"]) 570 | tr_post["internal_parent_id"] = 0 571 | 572 | tr_post["domainName"] = "reddit.com" 573 | tr_post["mediaType"] = "Social_Networks" 574 | tr_post["url"] = post["full_link"] 575 | tr_post["author"] = post["author"] 576 | tr_post["authorLocation"] = "" 577 | tr_post["creationDateTime"] = dt.fromtimestamp(post["created_utc"],pytz.timezone('UTC')) 578 | tr_post["lang"] = detect(text=post["selftext"].replace("\n",""), low_memory=False)["lang"] if "selftext" in post else detect(text=post["titile"].replace("\n",""), low_memory=False)["lang"] 579 | if(tr_post["lang"] in self.languages): 580 | self.languages[tr_post["lang"]] += 1 581 | else: 582 | self.languages[tr_post["lang"]] = 1 583 | tr_post["title"] = post["title"] 584 | tr_post["description"] = '' 585 | tr_post["content"] = cleanhtml(post["selftext"].replace("\n","").replace("'","''")) if "selftext" in post else cleanhtml(post["title"].replace("\n","").replace("'","''")) 586 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 587 | tr_post["content"] = tr_post["title"] 588 | 589 | subkeywords = [x for x in keywords if x in tr_post["content"]] 590 | tr_post["keyword"] = subkeywords[0] if len(subkeywords) != 0 else keywords[0] 591 | tr_post["controversial"] = False 592 | tr_post["tokenOfInterest"] = list() 593 | max_ngram_size = 1 594 | deduplication_thresold = 0.9 595 | deduplication_algo = 'seqm' 596 | windowSize = 1 597 | numOfKeywords = 20 598 | 599 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 600 | kx = kw_extractor.extract_keywords(tr_post["content"]) 601 | 602 | for kw in kx: 603 | if(kw[0] not in tr_post["tokenOfInterest"]): 604 | tr_post["tokenOfInterest"].append(kw[0]) 605 | 606 | tr_post["reference"] = post["subreddit"] 607 | tr_post["link"] = post["url"] 608 | tr_post["is_video"] = post["is_video"] 609 | tr_post["nb_comments"] = 0 610 | tr_post["nb_shared"] = 0 611 | tr_post["nb_liked"] = 0 612 | tr_post["topics"] = list() 613 | tr_post["entities"] = list() 614 | tr_post["medias"] = list() 615 | tr_post["links"] = list() 616 | 617 | if(tr_post["url"] not in results): 618 | self.send_doc(tr_post) 619 | except Exception as e: 620 | pass 621 | 622 | if(target == "reddit3"): 623 | 624 | r = requests.get("https://api.pushshift.io/reddit/search/comment/?subreddit="+"|".join(keywords)+"&after=5m").json() 625 | #r = requests.get(f'https://api.pushshift.io/reddit/search/comment/?subreddit='+keyword+"&after=5m").json() 626 | posts = r["data"] 627 | 628 | for post in posts: 629 | try: 630 | tr_post = dict() 631 | 632 | tr_post["internal_id"] = str(post["id"]) 633 | tr_post["internal_parent_id"] = post["parent_id"] if post["parent_id"] != None else 0 634 | 635 | tr_post["domainName"] = "reddit.com" 636 | tr_post["mediaType"] = "Social_Networks" 637 | tr_post["url"] = "https://www.reddit.com" + post["permalink"] 638 | tr_post["author"] = post["author"] 639 | tr_post["authorLocation"] = "" 640 | tr_post["creationDateTime"] = dt.fromtimestamp(post["created_utc"],pytz.timezone('UTC')) 641 | tr_post["lang"] = detect(text=post["body"].replace("\n",""), low_memory=False)["lang"] 642 | if(tr_post["lang"] in self.languages): 643 | self.languages[tr_post["lang"]] += 1 644 | else: 645 | self.languages[tr_post["lang"]] = 1 646 | tr_post["title"] = '' 647 | tr_post["description"] = '' 648 | tr_post["content"] = cleanhtml(post["body"].replace("\n","").replace("'","''")) 649 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 650 | tr_post["content"] = tr_post["title"] 651 | 652 | subkeywords = [x for x in keywords if x in tr_post["content"]] 653 | tr_post["keyword"] = subkeywords[0] if len(subkeywords) != 0 else keywords[0] 654 | 655 | tr_post["controversial"] = False 656 | tr_post["tokenOfInterest"] = list() 657 | max_ngram_size = 1 658 | deduplication_thresold = 0.9 659 | deduplication_algo = 'seqm' 660 | windowSize = 1 661 | numOfKeywords = 20 662 | 663 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 664 | kx = kw_extractor.extract_keywords(tr_post["content"]) 665 | 666 | for kw in kx: 667 | if(kw[0] not in tr_post["tokenOfInterest"]): 668 | tr_post["tokenOfInterest"].append(kw[0]) 669 | 670 | tr_post["reference"] = post["subreddit"] 671 | tr_post["link"] = "https://www.reddit.com" + post["permalink"] 672 | tr_post["is_video"] = post["is_video"] if "is_video" in post else None 673 | tr_post["nb_comments"] = 0 674 | tr_post["nb_shared"] = 0 675 | tr_post["nb_liked"] = 0 676 | tr_post["topics"] = list() 677 | tr_post["entities"] = list() 678 | tr_post["medias"] = list() 679 | tr_post["links"] = list() 680 | 681 | if(tr_post["url"] not in results): 682 | self.send_doc(tr_post) 683 | #Save to DB 684 | except Exception as e: 685 | pass 686 | 687 | if(target == "reddit4"): 688 | 689 | r = requests.get("https://api.pushshift.io/reddit/search/submission/?subreddit="+"|".join(keywords)+"&after=5m").json() 690 | posts = r["data"] 691 | 692 | for post in posts: 693 | try: 694 | tr_post = dict() 695 | 696 | tr_post["internal_id"] = str(post["id"]) 697 | tr_post["internal_parent_id"] = post["parent_id"] if post["parent_id"] != None else 0 698 | 699 | tr_post["domainName"] = "reddit.com" 700 | tr_post["mediaType"] = "Social_Networks" 701 | tr_post["keyword"] = ",".join(self.keywords) 702 | tr_post["url"] = "https://www.reddit.com" + post["permalink"] 703 | tr_post["author"] = post["author"] 704 | tr_post["authorLocation"] = "" 705 | tr_post["creationDateTime"] = dt.fromtimestamp(post["created_utc"],pytz.timezone('UTC')) 706 | tr_post["lang"] = detect(text=post["body"].replace("\n",""), low_memory=False)["lang"] 707 | if(tr_post["lang"] in self.languages): 708 | self.languages[tr_post["lang"]] += 1 709 | else: 710 | self.languages[tr_post["lang"]] = 1 711 | tr_post["title"] = '' 712 | tr_post["description"] = '' 713 | tr_post["content"] = cleanhtml(post["body"].replace("\n","").replace("'","''")) 714 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 715 | tr_post["content"] = tr_post["title"] 716 | 717 | subkeywords = [x for x in keywords if x in tr_post["content"]] 718 | tr_post["keyword"] = subkeywords[0] if len(subkeywords) != 0 else keywords[0] 719 | 720 | tr_post["controversial"] = False 721 | tr_post["tokenOfInterest"] = list() 722 | max_ngram_size = 1 723 | deduplication_thresold = 0.9 724 | deduplication_algo = 'seqm' 725 | windowSize = 1 726 | numOfKeywords = 20 727 | 728 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 729 | kx = kw_extractor.extract_keywords(tr_post["content"]) 730 | 731 | for kw in kx: 732 | if(kw[0] not in tr_post["tokenOfInterest"]): 733 | tr_post["tokenOfInterest"].append(kw[0]) 734 | 735 | tr_post["reference"] = post["subreddit"] 736 | tr_post["link"] = "https://www.reddit.com" + post["permalink"] 737 | tr_post["is_video"] = post["is_video"] if "is_video" in post else None 738 | tr_post["nb_comments"] = 0 739 | tr_post["nb_shared"] = 0 740 | tr_post["nb_liked"] = 0 741 | tr_post["topics"] = list() 742 | tr_post["entities"] = list() 743 | tr_post["medias"] = list() 744 | tr_post["links"] = list() 745 | 746 | if(tr_post["url"] not in results): 747 | self.send_doc(tr_post) 748 | except Exception as e: 749 | pass 750 | 751 | if(target == "instagram"): 752 | 753 | pass 754 | 755 | if (target == "twitter1"): 756 | 757 | d= dt.now(pytz.timezone('UTC')) - timedelta(minutes=5) 758 | 759 | try: 760 | c = 0 761 | for keyword in keywords: 762 | 763 | rng_top = random.randint(1, 100) 764 | top_selected = False 765 | if rng_top < 10: 766 | top_selected = True 767 | 768 | today = date.today() 769 | 770 | for i, _post in enumerate(snscrape.modules.twitter.TwitterSearchScraper('{} since:{}'.format(keyword, today), top = top_selected).get_items()): 771 | post = _post.__dict__ 772 | 773 | tr_post = dict() 774 | c += 1 775 | if c > 100: 776 | break 777 | 778 | tr_post["internal_id"] = str(post["id"]) 779 | tr_post["internal_parent_id"] = post["inReplyToTweetId"] #post["referenced_tweets"][0]["id"] if "referenced_tweets" in post and len(post["referenced_tweet"]) != 0 and post["referenced_tweets"][0]["id"] != None else 0 780 | 781 | 782 | tr_post["keyword"] = keyword 783 | tr_post["mediaType"] = "Social_Networks" 784 | tr_post["domainName"] = "twitter.com" 785 | tr_post["url"] = "https://twitter.com/ExordeLabs/status/{}".format(post["id"]) 786 | tr_post["author"] = post["user"].displayname 787 | tr_post["authorLocation"] = post["user"].location 788 | tr_post["creationDateTime"] = post["date"] #parse(post["date"]).replace(tzinfo=pytz.timezone('UTC')) 789 | tr_post["lang"] = post["lang"] 790 | if(tr_post["lang"] in self.languages): 791 | self.languages[tr_post["lang"]] += 1 792 | else: 793 | self.languages[tr_post["lang"]] = 1 794 | tr_post["title"] = '' #post["title"] if "title" in post else None 795 | tr_post["description"] = '' #post["annotations"]["description"] if "annotations" in post and "description" in post["annotations"] else '' 796 | tr_post["content"] = cleanhtml(post["renderedContent"].replace("\n","").replace("'","''")) 797 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 798 | tr_post["content"] = tr_post["title"] 799 | 800 | tr_post["controversial"] = False 801 | tr_post["tokenOfInterest"] = list() 802 | max_ngram_size = 1 803 | deduplication_thresold = 0.9 804 | deduplication_algo = 'seqm' 805 | windowSize = 1 806 | numOfKeywords = 20 807 | 808 | if detailed_validation_printing_enabled: 809 | print("Tweet found = ",tr_post["internal_id"], tr_post["creationDateTime"] ) 810 | 811 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 812 | kx = kw_extractor.extract_keywords(tr_post["content"]) 813 | 814 | for kw in kx: 815 | if(kw[0] not in tr_post["tokenOfInterest"]): 816 | tr_post["tokenOfInterest"].append(kw[0]) 817 | 818 | 819 | tr_post["reference"] = '' 820 | tr_post["links"] = list() 821 | if("outlinks" in post and type(post["outlinks"]) != None): 822 | try: 823 | for j in range(len(post["outlinks"])): 824 | tr_post["links"].append(post["outlinks"][i]) 825 | except: 826 | pass 827 | if("tcooutlinks" in post and type(post["tcooutlinks"]) != None): 828 | try: 829 | for j in range(len(post["tcooutlinks"])): 830 | tr_post["links"].append(post["tcooutlinks"][i]) 831 | except: 832 | pass 833 | tr_post["is_video"] = None #post["is_video"] if "is_video" in post else '' 834 | tr_post["nb_comments"] = post["replyCount"] 835 | tr_post["nb_shared"] = post["retweetCount"] 836 | tr_post["nb_liked"] = post["likeCount"] 837 | tr_post["topics"] = post["hashtags"] 838 | tr_post["entities"] = list() 839 | tr_post["medias"] = list() 840 | 841 | if("context_annotation" in post): 842 | for lvl0 in post["context_annotations"]: 843 | 844 | tr_post["topics"].append({"superclass":lvl0["domain"]["name"], 845 | "superdesc":lvl0["domain"]["description"], 846 | "class":lvl0["entity"]["name"], 847 | "desc":lvl0["domain"]["description"],}) 848 | 849 | tr_post["mentions"] = list() 850 | if("entities" in post): 851 | for entType in post["entities"]: 852 | 853 | if(entType == "hashtags"): 854 | for i in range(len(post["entities"]["hashtags"])): 855 | tr_post["mentions"].append(post["entities"]["hashtags"][i]["tag"]) 856 | 857 | if(entType == "mentions"): 858 | for i in range(len(post["entities"]["mentions"])): 859 | tr_post["mentions"].append(post["entities"]["mentions"][i]["username"]) 860 | 861 | if(entType == "urls"): 862 | for i in range(len(post["entities"]["urls"])): 863 | tr_post["links"].append(post["entities"]["urls"][i]["expanded_url"]) 864 | 865 | if(entType == "unwound_url"): 866 | tr_post["links"].append(post["entities"]["unwound_url"]) 867 | 868 | if(entType == "annotations"): 869 | for i in range(len(post["entities"]["annotations"])): 870 | annot = post["entities"]["annotations"][i] 871 | neo_annot = {"type":annot["type"], 872 | "name":annot["normalized_text"], 873 | "proba":annot["probability"]} 874 | tr_post["entities"].append(neo_annot) 875 | 876 | if(entType == "images"): 877 | for i in range(len(post["entities"]["images"])): 878 | img = post["entities"]["urls"][i] 879 | neo_img = {"type":"img", 880 | "url":img["url"]} 881 | tr_post["medias"].append(neo_img) 882 | 883 | 884 | if(tr_post["url"] not in results): 885 | self.send_doc(tr_post) 886 | 887 | except Exception as e: 888 | print("ERROR: ",e) 889 | pass 890 | 891 | if (target == "twitter2"): 892 | 893 | try: 894 | d= dt.now(pytz.timezone('UTC')) - timedelta(minutes=5) 895 | 896 | c = 0 897 | for keyword in keywords: 898 | 899 | postList = [_post.__dict__ for i, _post in enumerate(snscrape.modules.twitter.TwitterHashtagScraper(keyword + ' since_time:{}'.format(int(d.timestamp()))).get_items()) if _post.__dict__["date"].timestamp() >= d.timestamp()] 900 | 901 | for post in postList: 902 | 903 | c += 1 904 | if c > 100: 905 | break 906 | 907 | if(post["date"].timestamp() > d.timestamp()): 908 | 909 | tr_post = dict() 910 | 911 | tr_post["internal_id"] = str(post["id"]) 912 | tr_post["internal_parent_id"] = post["inReplyToTweetId"] #post["referenced_tweets"][0]["id"] if "referenced_tweets" in post and len(post["referenced_tweet"]) != 0 and post["referenced_tweets"][0]["id"] != None else 0 913 | 914 | tr_post["keyword"] = keyword 915 | tr_post["mediaType"] = "Social_Networks" 916 | tr_post["domainName"] = "twitter.com" 917 | tr_post["url"] = "https://twitter.com/ExordeLabs/status/{}".format(post["id"]) 918 | tr_post["author"] = post["user"].displayname 919 | tr_post["authorLocation"] = post["user"].location 920 | tr_post["creationDateTime"] = post["date"] #parse(post["date"]).replace(tzinfo=pytz.timezone('UTC')) 921 | tr_post["lang"] = post["lang"] 922 | 923 | if detailed_validation_printing_enabled: 924 | print("Tweet found = ",tr_post["internal_id"], tr_post["creationDateTime"] ) 925 | 926 | if(tr_post["lang"] in self.languages): 927 | self.languages[tr_post["lang"]] += 1 928 | else: 929 | self.languages[tr_post["lang"]] = 1 930 | tr_post["title"] = '' #post["title"] if "title" in post else None 931 | tr_post["description"] = '' #post["annotations"]["description"] if "annotations" in post and "description" in post["annotations"] else '' 932 | tr_post["content"] = cleanhtml(post["renderedContent"].replace("\n","").replace("'","''")) 933 | if(tr_post["content"] in ("","[removed]") and tr_post["title"] != ""): 934 | tr_post["content"] = tr_post["title"] 935 | tr_post["controversial"] = False 936 | tr_post["tokenOfInterest"] = list() 937 | max_ngram_size = 1 938 | deduplication_thresold = 0.9 939 | deduplication_algo = 'seqm' 940 | windowSize = 1 941 | numOfKeywords = 20 942 | 943 | kw_extractor = yake.KeywordExtractor(lan=tr_post["lang"], n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) 944 | kx = kw_extractor.extract_keywords(tr_post["content"]) 945 | 946 | for kw in kx: 947 | if(kw[0] not in tr_post["tokenOfInterest"]): 948 | tr_post["tokenOfInterest"].append(kw[0]) 949 | 950 | 951 | tr_post["reference"] = '' 952 | tr_post["links"] = list() 953 | if("outlinks" in post and type(post["outlinks"]) != None): 954 | try: 955 | for j in range(len(post["outlinks"])): 956 | tr_post["links"].append(post["outlinks"][i]) 957 | except: 958 | pass 959 | if("tcooutlinks" in post and type(post["tcooutlinks"]) != None): 960 | try: 961 | for j in range(len(post["tcooutlinks"])): 962 | tr_post["links"].append(post["tcooutlinks"][i]) 963 | except: 964 | pass 965 | tr_post["is_video"] = None 966 | tr_post["nb_comments"] = post["replyCount"] 967 | tr_post["nb_shared"] = post["retweetCount"] 968 | tr_post["nb_liked"] = post["likeCount"] 969 | tr_post["topics"] = post["hashtags"] 970 | tr_post["entities"] = list() 971 | tr_post["medias"] = list() 972 | 973 | if("context_annotation" in post): 974 | for lvl0 in post["context_annotations"]: 975 | 976 | tr_post["topics"].append({"superclass":lvl0["domain"]["name"], 977 | "superdesc":lvl0["domain"]["description"], 978 | "class":lvl0["entity"]["name"], 979 | "desc":lvl0["domain"]["description"],}) 980 | 981 | tr_post["mentions"] = list() 982 | if("entities" in post): 983 | for entType in post["entities"]: 984 | 985 | if(entType == "hashtags"): 986 | for i in range(len(post["entities"]["hashtags"])): 987 | tr_post["mentions"].append(post["entities"]["hashtags"][i]["tag"]) 988 | 989 | if(entType == "mentions"): 990 | for i in range(len(post["entities"]["mentions"])): 991 | tr_post["mentions"].append(post["entities"]["mentions"][i]["username"]) 992 | 993 | if(entType == "urls"): 994 | for i in range(len(post["entities"]["urls"])): 995 | tr_post["links"].append(post["entities"]["urls"][i]["expanded_url"]) 996 | 997 | if(entType == "unwound_url"): 998 | tr_post["links"].append(post["entities"]["unwound_url"]) 999 | 1000 | if(entType == "annotations"): 1001 | for i in range(len(post["entities"]["annotations"])): 1002 | annot = post["entities"]["annotations"][i] 1003 | neo_annot = {"type":annot["type"], 1004 | "name":annot["normalized_text"], 1005 | "proba":annot["probability"]} 1006 | tr_post["entities"].append(neo_annot) 1007 | 1008 | if(entType == "images"): 1009 | for i in range(len(post["entities"]["images"])): 1010 | img = post["entities"]["urls"][i] 1011 | neo_img = {"type":"img", 1012 | "url":img["url"]} 1013 | tr_post["medias"].append(neo_img) 1014 | 1015 | 1016 | if(tr_post["url"] not in results): 1017 | self.send_doc(tr_post) 1018 | except Exception as e: 1019 | print("ERROR: ",e) 1020 | pass 1021 | 1022 | sys.exit() 1023 | except Exception as e: 1024 | #print("scraping", e) 1025 | sys.exit() 1026 | 1027 | def manage_sending(self): 1028 | 1029 | _contract = self.app.cm.instantiateContract("ConfigRegistry") 1030 | 1031 | while True: 1032 | try: 1033 | try: 1034 | time.sleep(0.5) 1035 | batchSize = int(_contract.functions.get("_ModuleMinSpotBatchSize").call()) 1036 | self.lastBatchSize = batchSize 1037 | except Exception as e: 1038 | batchSize = self.lastBatchSize 1039 | 1040 | if(len(self.pendingBlocks) >= batchSize): 1041 | 1042 | tmp = self.pendingBlocks[:batchSize] 1043 | 1044 | 1045 | res = None 1046 | 1047 | for i in range(3): 1048 | time.sleep(0.5) 1049 | try: 1050 | res = ipfs_pin_upload(json.dumps({"Content":tmp}, indent=4, sort_keys=True, default=str)) 1051 | time.sleep(7.5) 1052 | break 1053 | except Exception as e: 1054 | print("[Spotting] ipfs_pin_upload error: ",e) 1055 | res = None 1056 | domNames = [x["item"]["DomainName"] for x in self.pendingBlocks[:batchSize]][0] 1057 | if(res != None): 1058 | 1059 | contract = self.app.cm.instantiateContract("DataSpotting") 1060 | increment_tx = contract.functions.SpotData([res], [domNames], [batchSize], 'Hi Bob!').buildTransaction( 1061 | { 1062 | 'nonce': w3.eth.get_transaction_count(self.app.localconfig["ExordeApp"]["ERCAddress"]), 1063 | 'from': self.app.localconfig["ExordeApp"]["ERCAddress"], 1064 | 'gasPrice': w3.eth.gas_price 1065 | }) 1066 | if validation_printing_enabled: 1067 | print("Putting SpotData tx in the WaitingRoom") 1068 | self.app.tm.waitingRoom.put((increment_tx, self.app.localconfig["ExordeApp"]["ERCAddress"], self.app.pKey)) 1069 | #print("File sent", res) 1070 | else: 1071 | print("[Spotting] Failed to pin file to IPFS network") 1072 | self.pendingBlocks = self.pendingBlocks[batchSize:] 1073 | 1074 | except Exception as e: 1075 | pass 1076 | 1077 | 1078 | 1079 | 1080 | def send_doc(self, doc): 1081 | 1082 | try: 1083 | document = dict() 1084 | 1085 | tr_item = dict() 1086 | tr_item["CreationDateTime"] = doc["creationDateTime"] 1087 | tr_item["Language"] = doc["lang"] 1088 | tr_item["Url"] = doc["url"] 1089 | tr_item["Author"] = doc["author"] 1090 | tr_item["Title"] = doc["title"] 1091 | tr_item["Description"] = doc["description"].replace("'", "''") 1092 | tr_item["Content"] = doc["content"].replace('"','\"') 1093 | #tr_item["Sentiment"] = doc["sentiment"] 1094 | tr_item["Controversial"] = doc["controversial"] 1095 | # tr_item["Toxic"] = doc["toxic"] 1096 | # tr_item["Censored"] = doc["censored"] 1097 | tr_item["Reference"] = doc["reference"] 1098 | tr_item["nbComments"] = doc["nb_comments"] 1099 | tr_item["nbShared"] = doc["nb_shared"] 1100 | tr_item["nbLiked"] = doc["nb_liked"] 1101 | tr_item["DomainName"] = doc["domainName"] 1102 | #tr_item["isIrony"] = doc["isIrony"] 1103 | tr_item["internal_id"] = doc["internal_id"] 1104 | tr_item["internal_parent_id"] = doc["internal_parent_id"] 1105 | tr_item["mediaType"] = doc["mediaType"] 1106 | 1107 | 1108 | document["item"] = tr_item 1109 | document["keyword"] = doc["keyword"] 1110 | #document["categories"] = doc["categories"] 1111 | document["links"] = doc["links"] 1112 | document["entities"] = doc["entities"] 1113 | document["medias"] = doc["medias"] 1114 | document["tokenOfInterest"] = doc["tokenOfInterest"] 1115 | 1116 | localization_found = True 1117 | try: 1118 | if localization_enabled in locals() or localization_enabled in globals(): 1119 | localization_found = localization_enabled 1120 | except: 1121 | localization_enabled = True 1122 | 1123 | if(localization_found): 1124 | document["spotterCountry"] = self.app.userCountry 1125 | else: 1126 | document["spotterCountry"] = "" 1127 | 1128 | self.pendingBlocks.append(document) 1129 | self.nbItems += 1 1130 | 1131 | except Exception as e: 1132 | pass 1133 | --------------------------------------------------------------------------------