├── src ├── accounts │ ├── __init__.py │ ├── migrations │ │ ├── __init__.py │ │ └── 0001_initial.py │ ├── tests.py │ ├── apps.py │ ├── templates │ │ └── accounts │ │ │ ├── register_done.html │ │ │ ├── login.html │ │ │ ├── register.html │ │ │ └── update.html │ ├── urls.py │ ├── models.py │ ├── forms.py │ ├── admin.py │ └── views.py ├── scraping_service │ ├── __init__.py │ ├── settings │ │ ├── __init__.py │ │ └── production.py │ ├── views.py │ ├── asgi.py │ ├── wsgi.py │ └── urls.py ├── scraping │ ├── migrations │ │ ├── __init__.py │ │ ├── 0002_error.py │ │ ├── 0003_url.py │ │ └── 0001_initial.py │ ├── __init__.py │ ├── tests.py │ ├── apps.py │ ├── admin.py │ ├── templates │ │ └── scraping │ │ │ ├── delete.html │ │ │ ├── detail.html │ │ │ ├── create.html │ │ │ ├── home.html │ │ │ └── list.html │ ├── utils.py │ ├── forms.py │ ├── models.py │ ├── views.py │ └── parsers.py ├── Procfile ├── static │ ├── favicon.ico │ ├── fonts │ │ └── WendyOne-Regular.ttf │ └── css │ │ └── main.css ├── templates │ ├── 403.html │ ├── 500.html │ ├── 400.html │ ├── 404.html │ └── base.html ├── requirements.txt ├── manage.py ├── Django_templates_tags.txt ├── mydata.json ├── run_scraping.py ├── send_emails.py └── .gitignore └── README.md /src/accounts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scraping_service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/accounts/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/scraping/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn scraping_service.wsgi --log-file – -------------------------------------------------------------------------------- /src/scraping/__init__.py: -------------------------------------------------------------------------------- 1 | default_app_config = 'scraping.apps.ScrapingConfig' 2 | -------------------------------------------------------------------------------- /src/accounts/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /src/scraping/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /src/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olegJF/scraping_service/HEAD/src/static/favicon.ico -------------------------------------------------------------------------------- /src/accounts/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class AccountsConfig(AppConfig): 5 | name = 'accounts' 6 | -------------------------------------------------------------------------------- /src/static/fonts/WendyOne-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olegJF/scraping_service/HEAD/src/static/fonts/WendyOne-Regular.ttf -------------------------------------------------------------------------------- /src/scraping_service/settings/__init__.py: -------------------------------------------------------------------------------- 1 | from .production import * 2 | try: 3 | from .local_settings import * 4 | except ImportError: 5 | pass 6 | -------------------------------------------------------------------------------- /src/scraping/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class ScrapingConfig(AppConfig): 5 | name = 'scraping' 6 | verbose_name = 'Приложение по сбору вакансий' 7 | -------------------------------------------------------------------------------- /src/static/css/main.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: WendyOne; 3 | src: url(../fonts/WendyOne-Regular.ttf); 4 | } 5 | #brand { 6 | font-family: WendyOne; 7 | color: #FFF; 8 | } 9 | -------------------------------------------------------------------------------- /src/scraping/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from .models import City, Language, Vacancy, Error, Url 3 | 4 | admin.site.register(City) 5 | admin.site.register(Language) 6 | admin.site.register(Vacancy) 7 | admin.site.register(Error) 8 | admin.site.register(Url) 9 | -------------------------------------------------------------------------------- /src/scraping_service/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | import datetime 3 | 4 | 5 | def home(request): 6 | date = datetime.datetime.now().date() 7 | name = 'Dave' 8 | _context = {'date': date, 'name': name} 9 | return render(request, 'home.html', _context) 10 | -------------------------------------------------------------------------------- /src/templates/403.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 403 {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Ошибка 403. Доступ запрещен.

10 | 11 |
12 |
13 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /src/templates/500.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 500 {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Ошибка 500. Ошибка сервера.

10 | 11 |
12 |
13 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /src/templates/400.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 400 {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Ошибка 400. Неправильный запрос.

10 | 11 |
12 |
13 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /src/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 404 {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Ошибка 404. Страница не найдена.

10 | 11 |
12 |
13 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | asgiref==3.2.3 2 | beautifulsoup4==4.8.2 3 | certifi==2019.11.28 4 | chardet==3.0.4 5 | dj-database-url==0.5.0 6 | Django==3.1.13 7 | django-jsonfield==1.4.0 8 | gunicorn==20.0.4 9 | idna==2.9 10 | psycopg2-binary==2.8.5 11 | pytz==2019.3 12 | requests==2.23.0 13 | six==1.14.0 14 | soupsieve==2.0 15 | sqlparse==0.3.1 16 | urllib3==1.26.5 17 | whitenoise==5.0.1 18 | -------------------------------------------------------------------------------- /src/scraping/templates/scraping/delete.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Delete {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |
{% csrf_token %} 10 |

{{object}}

11 | 12 |
13 |
14 |
15 | 16 | {% endblock %} 17 | -------------------------------------------------------------------------------- /src/scraping_service/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for scraping_service project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'scraping_service.settings') 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /src/scraping_service/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for scraping_service project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'scraping_service.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /src/accounts/templates/accounts/register_done.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Register {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Приветствуем нового пользователя {{ new_user.email}}

10 |

Вы зарегистрировались в системе. 11 | Теперь Вы можете войти. 12 |

13 | 14 |
15 |
16 | 17 | {% endblock %} 18 | -------------------------------------------------------------------------------- /src/accounts/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | from accounts.views import ( 3 | login_view, logout_view, register_view, update_view, delete_view, contact 4 | ) 5 | 6 | urlpatterns = [ 7 | path('login/', login_view, name='login'), 8 | path('logout/', logout_view, name='logout'), 9 | path('register/', register_view, name='register'), 10 | path('update/', update_view, name='update'), 11 | path('delete/', delete_view, name='delete'), 12 | path('contact/', contact, name='contact'), 13 | ] 14 | -------------------------------------------------------------------------------- /src/accounts/templates/accounts/login.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Login {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Форма входа

10 |
{% csrf_token %} 11 |
12 | {{ form.as_p }} 13 |
14 | 15 |
16 |
17 |
18 | 19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /src/accounts/templates/accounts/register.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Register {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Регистрация нового пользователя

10 |
{% csrf_token %} 11 |
12 | {{ form.as_p }} 13 |
14 | 15 |
16 |
17 |
18 | 19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /src/scraping/templates/scraping/detail.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Job Finder {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
{{ object.title }}
9 |
10 |

{{ object.description }}

11 |

{{ object.company }}

12 |

{{object.city.name }} | {{object.language.name }} | 13 | {{object.timestamp|date:"d-m-Y" }}

14 | 15 |
16 |
17 | 18 | 19 | 20 | 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /src/scraping/templates/scraping/create.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Job Finder {% endblock %} 4 | 5 | {% block content %} 6 |
7 |
8 |
9 |
10 |

11 |

{% csrf_token %} 12 | {{form.as_p}} 13 | 14 |
15 |

16 |
17 |
18 |
19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 | {% endblock %} 27 | -------------------------------------------------------------------------------- /src/scraping/migrations/0002_error.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.3 on 2020-03-29 10:23 2 | 3 | from django.db import migrations, models 4 | import jsonfield.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('scraping', '0001_initial'), 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='Error', 16 | fields=[ 17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('timestamp', models.DateField(auto_now_add=True)), 19 | ('data', jsonfield.fields.JSONField(default=dict)), 20 | ], 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /src/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'scraping_service.settings') 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?" 16 | ) from exc 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /src/Django_templates_tags.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | block {% block %} / {% endblock %} 4 | 5 | {% block $BLOCK$ %} 6 | $END$ 7 | {% endblock $BLOCK$%} 8 | 9 | ============================== 10 | 11 | for {% for %} / {% endfor %} 12 | 13 | {% for $VAR$ in $COLLECTION$ %} 14 | $END$ 15 | {% endfor %} 16 | 17 | $VAR$ "obj" 18 | $COLLECTION$ "object_list" 19 | 20 | ============================ 21 | 22 | if {% if %} / {% endif %} 23 | 24 | {% if $VAR$ %} 25 | $END$ 26 | {% elif %}$END$ 27 | {% else %}$END$ 28 | {% endif %} 29 | 30 | ============================ 31 | 32 | csrf {% csrf_token %} 33 | 34 | {% csrf_token %}$END$ 35 | 36 | ============================== 37 | q {{ }} 38 | {{ }} $END$ 39 | 40 | ============================== 41 | 42 | qw {% %} 43 | {% %} $END$ 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/scraping/utils.py: -------------------------------------------------------------------------------- 1 | cyrillic_letters = { 2 | u'а': u'a', 3 | u'б': u'b', 4 | u'в': u'v', 5 | u'г': u'g', 6 | u'д': u'd', 7 | u'е': u'e', 8 | u'ё': u'e', 9 | u'ж': u'zh', 10 | u'з': u'z', 11 | u'и': u'i', 12 | u'й': u'y', 13 | u'к': u'k', 14 | u'л': u'l', 15 | u'м': u'm', 16 | u'н': u'n', 17 | u'о': u'o', 18 | u'п': u'p', 19 | u'р': u'r', 20 | u'с': u's', 21 | u'т': u't', 22 | u'у': u'u', 23 | u'ф': u'f', 24 | u'х': u'h', 25 | u'ц': u'ts', 26 | u'ч': u'ch', 27 | u'ш': u'sh', 28 | u'щ': u'sch', 29 | u'ъ': u'', 30 | u'ы': u'y', 31 | u'ь': u'', 32 | u'э': u'e', 33 | u'ю': u'yu', 34 | u'я': u'ya' 35 | } 36 | 37 | 38 | def from_cyrillic_to_eng(text: str): 39 | text = text.replace(' ', '_').lower() 40 | tmp = '' 41 | for ch in text: 42 | tmp += cyrillic_letters.get(ch, ch) 43 | return tmp 44 | -------------------------------------------------------------------------------- /src/mydata.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "scraping.city", 4 | "pk": 1, 5 | "fields": { 6 | "name": "\u041a\u0438\u0435\u0432", 7 | "slug": "kiev" 8 | } 9 | }, 10 | { 11 | "model": "scraping.city", 12 | "pk": 3, 13 | "fields": { 14 | "name": "\u041e\u0434\u0435\u0441\u0441\u0430", 15 | "slug": "odessa" 16 | } 17 | }, 18 | { 19 | "model": "scraping.language", 20 | "pk": 1, 21 | "fields": { 22 | "name": "Python", 23 | "slug": "python" 24 | } 25 | }, 26 | { 27 | "model": "scraping.language", 28 | "pk": 2, 29 | "fields": { 30 | "name": "Java", 31 | "slug": "java" 32 | } 33 | }, 34 | { 35 | "model": "scraping.url", 36 | "pk": 1, 37 | "fields": { 38 | "city": 1, 39 | "language": 1, 40 | "url_data": { 41 | "work": "https://www.work.ua/ru/jobs-kyiv-python/", 42 | "rabota": "https://rabota.ua/zapros/python/%d0%ba%d0%b8%d0%b5%d0%b2", 43 | "dou": "https://jobs.dou.ua/vacancies/?city=%D0%9A%D0%B8%D0%B5%D0%B2&category=Python", 44 | "djinni": "https://djinni.co/jobs/?location=%D0%9A%D0%B8%D0%B5%D0%B2&primary_keyword=Python" 45 | } 46 | } 47 | } 48 | ] 49 | -------------------------------------------------------------------------------- /src/scraping/migrations/0003_url.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.3 on 2020-04-05 09:25 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import jsonfield.fields 6 | import scraping.models 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('scraping', '0002_error'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='Url', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('url_data', jsonfield.fields.JSONField(default=scraping.models.default_urls)), 21 | ('city', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraping.City', verbose_name='Город')), 22 | ('language', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraping.Language', verbose_name='Язык программирования')), 23 | ], 24 | options={ 25 | 'unique_together': {('city', 'language')}, 26 | }, 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scraping_service 2 | 3 | 4 | ### Сайт на Django 3. Сервис по сбору данных. 5 | Построение полноценного сайта-сервиса, который собирает данные о  вакансиях с сайтов по поиску работы и рассылает их подписчикам. 6 | Подписчики сервиса регистрируются выбирая город и язык программирования. 7 | Раз в сутки, происходит отбор всех подписчиков, которые хотят получать письма с вакансиями и на основе их предпочтений (какой город и какой ЯП), формируется список урлов, по которым происходит запуск парсеров для сбора вакансий по этим параметрам. После того, как парсеры отработают, запускается отправка писем тем, кто хочет получать рассылку. 8 | 9 | Вы узнаете, каким образом можно получать данные с сайтов , как их сохранять в БД, как создать веб-приложение на основе  Django и многое другое. 10 | А после того, как приложение уже будет готово, Вы узнаете, как его можно разместить в интернете так, чтобы оно было доступно для всех. 11 | 12 | Основная целевая аудитория курса - начинающие программисты, желающие получить знания по внутреннему устройству Django. 13 | [Обучающий видеокурс на ресурсе Udemy.com](https://www.udemy.com/course/site-on-django-3/) 14 | -------------------------------------------------------------------------------- /src/scraping/templates/scraping/home.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Home {% endblock %} 4 | 5 | {% block content %} 6 |
7 |

Сервис по сбору и рассылки вакансий

8 |
9 |
10 |
11 |
12 |

13 | Каждый день мы анализируем сайты с вакансиями, согласно настроек наших подпсчиков. 14 | Все подходящие вакансии, которые были опубликованы на этих сайтах за последние сутки, 15 | записываются в базу данных и рассылаюстя подписчикам, согласно их предпочтений. 16 | Вы можете просмартивать актуальные вакансии, просто выбрав необходимый город или специальность. 17 | Или же, пройдя несложную регистрацию, 18 | получать каждый день письмо со списком новых вакансий за последние сутки, согласно Ваших потребностей. 19 |

20 |
21 |
22 | {{ form.as_p }} 23 |
24 | 25 |
26 |
27 |
28 | 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /src/scraping_service/urls.py: -------------------------------------------------------------------------------- 1 | """scraping_service URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/3.0/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import path, include 18 | from scraping.views import ( 19 | home_view, list_view, v_detail, VDetail, VList, VCreate, VUpdate, VDelete 20 | ) 21 | 22 | urlpatterns = [ 23 | path('admin/', admin.site.urls), 24 | path('list/', list_view, name='list'), 25 | # path('list/', VList.as_view(), name='list'), 26 | path('accounts/', include(('accounts.urls', 'accounts'))), 27 | path('detail//', VDetail.as_view(), name='detail'), 28 | path('create/', VCreate.as_view(), name='create'), 29 | path('update//', VUpdate.as_view(), name='update'), 30 | path('delete//', VDelete.as_view(), name='delete'), 31 | path('', home_view, name='home'), 32 | ] 33 | -------------------------------------------------------------------------------- /src/accounts/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.3 on 2020-03-22 10:07 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | initial = True 10 | 11 | dependencies = [ 12 | ('scraping', '0001_initial'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='MyUser', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('password', models.CharField(max_length=128, verbose_name='password')), 21 | ('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')), 22 | ('email', models.EmailField(max_length=255, unique=True, verbose_name='email address')), 23 | ('is_active', models.BooleanField(default=True)), 24 | ('is_admin', models.BooleanField(default=False)), 25 | ('send_email', models.BooleanField(default=True)), 26 | ('city', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='scraping.City')), 27 | ('language', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='scraping.Language')), 28 | ], 29 | options={ 30 | 'abstract': False, 31 | }, 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /src/scraping/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | 3 | from scraping.models import City, Language, Vacancy 4 | 5 | 6 | class FindForm(forms.Form): 7 | city = forms.ModelChoiceField( 8 | queryset=City.objects.all(), to_field_name="slug", required=False, 9 | widget=forms.Select(attrs={'class': 'form-control'}), 10 | label='Город' 11 | ) 12 | language = forms.ModelChoiceField( 13 | queryset=Language.objects.all(), to_field_name="slug", required=False, 14 | widget=forms.Select(attrs={'class': 'form-control'}), 15 | label='Специальность' 16 | ) 17 | 18 | 19 | class VForm(forms.ModelForm): 20 | city = forms.ModelChoiceField( 21 | queryset=City.objects.all(), 22 | widget=forms.Select(attrs={'class': 'form-control'}), 23 | label='Город' 24 | ) 25 | language = forms.ModelChoiceField( 26 | queryset=Language.objects.all(), 27 | widget=forms.Select(attrs={'class': 'form-control'}), 28 | label='Специальность' 29 | ) 30 | url = forms.CharField(label='URL', widget=forms.URLInput( 31 | attrs={'class': 'form-control'})) 32 | title = forms.CharField(label='Заголовок вакансии', widget=forms.TextInput( 33 | attrs={'class': 'form-control'})) 34 | company = forms.CharField(label='Компания', widget=forms.TextInput( 35 | attrs={'class': 'form-control'})) 36 | description = forms.CharField(label='Описание вакансии', 37 | widget=forms.Textarea( 38 | attrs={'class': 'form-control'})) 39 | 40 | class Meta: 41 | model = Vacancy 42 | fields = '__all__' 43 | -------------------------------------------------------------------------------- /src/accounts/templates/accounts/update.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Register {% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |

Настройки

10 |
{% csrf_token %} 11 |
12 | {{ form.as_p }} 13 |
14 | 15 |
16 |
{% csrf_token %} 17 | 18 |
19 |
20 |
21 | 22 |
23 |
24 |

Если Вы не нашли подходящий набор Город, Специальность, 25 | Вы можете заполнить форму ниже с Вашими пожеланиями. После того, 26 | как необходимые данные будут добавлены, Вы получите письмо на указанный Вам и-мэйл.

27 |

28 | 34 |

35 |
36 |
37 |
{% csrf_token %} 38 |
39 | {{ contact_form.as_p }} 40 |
41 | 42 |
43 |
44 |
45 | 46 | 47 |
48 |
49 | {% endblock %} 50 | -------------------------------------------------------------------------------- /src/accounts/models.py: -------------------------------------------------------------------------------- 1 | from django.contrib.auth.base_user import AbstractBaseUser, BaseUserManager 2 | from django.db import models 3 | 4 | 5 | class MyUserManager(BaseUserManager): 6 | def create_user(self, email, password=None): 7 | """ 8 | Creates and saves a User with the given email, date of 9 | birth and password. 10 | """ 11 | if not email: 12 | raise ValueError('Users must have an email address') 13 | 14 | user = self.model( 15 | email=self.normalize_email(email) 16 | ) 17 | 18 | user.set_password(password) 19 | user.save(using=self._db) 20 | return user 21 | 22 | def create_superuser(self, email, password=None): 23 | """ 24 | Creates and saves a superuser with the given email, date of 25 | birth and password. 26 | """ 27 | user = self.create_user( 28 | email, 29 | password=password 30 | ) 31 | user.is_admin = True 32 | user.save(using=self._db) 33 | return user 34 | 35 | 36 | class MyUser(AbstractBaseUser): 37 | email = models.EmailField( 38 | verbose_name='email address', 39 | max_length=255, 40 | unique=True, 41 | ) 42 | is_active = models.BooleanField(default=True) 43 | is_admin = models.BooleanField(default=False) 44 | city = models.ForeignKey('scraping.City', on_delete=models.SET_NULL, 45 | null=True, blank=True) 46 | language = models.ForeignKey('scraping.Language', on_delete=models.SET_NULL, 47 | null=True, blank=True) 48 | send_email = models.BooleanField(default=True) 49 | 50 | objects = MyUserManager() 51 | 52 | USERNAME_FIELD = 'email' 53 | REQUIRED_FIELDS = [] 54 | 55 | def __str__(self): 56 | return self.email 57 | 58 | def has_perm(self, perm, obj=None): 59 | "Does the user have a specific permission?" 60 | # Simplest possible answer: Yes, always 61 | return True 62 | 63 | def has_module_perms(self, app_label): 64 | "Does the user have permissions to view the app `app_label`?" 65 | # Simplest possible answer: Yes, always 66 | return True 67 | 68 | @property 69 | def is_staff(self): 70 | "Is the user a member of staff?" 71 | # Simplest possible answer: All admins are staff 72 | return self.is_admin 73 | -------------------------------------------------------------------------------- /src/scraping/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.3 on 2020-03-22 10:07 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | initial = True 10 | 11 | dependencies = [ 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='City', 17 | fields=[ 18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('name', models.CharField(max_length=50, unique=True, verbose_name='Название населенного пункта')), 20 | ('slug', models.CharField(blank=True, max_length=50, unique=True)), 21 | ], 22 | options={ 23 | 'verbose_name': 'Название населенного пункта', 24 | 'verbose_name_plural': 'Название населенных пунктов', 25 | }, 26 | ), 27 | migrations.CreateModel( 28 | name='Language', 29 | fields=[ 30 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 31 | ('name', models.CharField(max_length=50, unique=True, verbose_name='Язык программирования')), 32 | ('slug', models.CharField(blank=True, max_length=50, unique=True)), 33 | ], 34 | options={ 35 | 'verbose_name': 'Язык программирования', 36 | 'verbose_name_plural': 'Языки программирования', 37 | }, 38 | ), 39 | migrations.CreateModel( 40 | name='Vacancy', 41 | fields=[ 42 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 43 | ('url', models.URLField(unique=True)), 44 | ('title', models.CharField(max_length=250, verbose_name='Заголовок вакансии')), 45 | ('company', models.CharField(max_length=250, verbose_name='Компания')), 46 | ('description', models.TextField(verbose_name='Описание вакансии')), 47 | ('timestamp', models.DateField(auto_now_add=True)), 48 | ('city', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraping.City', verbose_name='Город')), 49 | ('language', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraping.Language', verbose_name='Язык программирования')), 50 | ], 51 | options={ 52 | 'verbose_name': 'Вакансия', 53 | 'verbose_name_plural': 'Вакансии', 54 | }, 55 | ), 56 | ] 57 | -------------------------------------------------------------------------------- /src/run_scraping.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import codecs 3 | import os, sys 4 | import datetime as dt 5 | 6 | from django.contrib.auth import get_user_model 7 | from django.db import DatabaseError 8 | 9 | proj = os.path.dirname(os.path.abspath('manage.py')) 10 | sys.path.append(proj) 11 | os.environ["DJANGO_SETTINGS_MODULE"] = "scraping_service.settings" 12 | 13 | import django 14 | django.setup() 15 | 16 | from scraping.parsers import * 17 | from scraping.models import Vacancy, Error, Url 18 | 19 | User = get_user_model() 20 | 21 | parsers = ( 22 | (work, 'work'), 23 | (dou, 'dou'), 24 | (djinni, 'djinni'), 25 | (rabota, 'rabota') 26 | ) 27 | jobs, errors = [], [] 28 | 29 | 30 | def get_settings(): 31 | qs = User.objects.filter(send_email=True).values() 32 | settings_lst = set((q['city_id'], q['language_id']) for q in qs) 33 | return settings_lst 34 | 35 | 36 | def get_urls(_settings): 37 | qs = Url.objects.all().values() 38 | url_dict = {(q['city_id'], q['language_id']): q['url_data'] for q in qs} 39 | urls = [] 40 | for pair in _settings: 41 | if pair in url_dict: 42 | tmp = {} 43 | tmp['city'] = pair[0] 44 | tmp['language'] = pair[1] 45 | url_data = url_dict.get(pair) 46 | if url_data: 47 | tmp['url_data'] = url_dict.get(pair) 48 | urls.append(tmp) 49 | return urls 50 | 51 | 52 | async def main(value): 53 | func, url, city, language = value 54 | job, err = await loop.run_in_executor(None, func, url, city, language) 55 | errors.extend(err) 56 | jobs.extend(job) 57 | 58 | settings = get_settings() 59 | url_list = get_urls(settings) 60 | 61 | loop = asyncio.get_event_loop() 62 | tmp_tasks = [(func, data['url_data'][key], data['city'], data['language']) 63 | for data in url_list 64 | for func, key in parsers] 65 | 66 | # for data in url_list: 67 | # 68 | # for func, key in parsers: 69 | # url = data['url_data'][key] 70 | # j, e = func(url, city=data['city'], language=data['language']) 71 | # jobs += j 72 | # errors += e 73 | if tmp_tasks: 74 | tasks = asyncio.wait([loop.create_task(main(f)) for f in tmp_tasks]) 75 | loop.run_until_complete(tasks) 76 | loop.close() 77 | 78 | for job in jobs: 79 | v = Vacancy(**job) 80 | try: 81 | v.save() 82 | except DatabaseError: 83 | pass 84 | if errors: 85 | qs = Error.objects.filter(timestamp=dt.date.today()) 86 | if qs.exists(): 87 | err = qs.first() 88 | err.data.update({'errors': errors}) 89 | err.save() 90 | else: 91 | er = Error(data=f'errors:{errors}').save() 92 | 93 | # h = codecs.open('work.txt', 'w', 'utf-8') 94 | # h.write(str(jobs)) 95 | # h.close() 96 | ten_days_ago = dt.date.today() - dt.timedelta(10) 97 | Vacancy.objects.filter(timestamp__lte=ten_days_ago).delete() 98 | -------------------------------------------------------------------------------- /src/scraping/models.py: -------------------------------------------------------------------------------- 1 | import jsonfield 2 | from django.db import models 3 | 4 | from scraping.utils import from_cyrillic_to_eng 5 | 6 | 7 | def default_urls(): 8 | return {"work": "", "rabota": "", "dou": "", "djinni": ""} 9 | 10 | 11 | class City(models.Model): 12 | name = models.CharField(max_length=50, 13 | verbose_name='Название населенного пункта', 14 | unique=True) 15 | slug = models.CharField(max_length=50, blank=True, unique=True) 16 | 17 | class Meta: 18 | verbose_name = 'Название населенного пункта' 19 | verbose_name_plural = 'Название населенных пунктов' 20 | 21 | def __str__(self): 22 | return self.name 23 | 24 | def save(self, *args, **kwargs): 25 | if not self.slug: 26 | self.slug = from_cyrillic_to_eng(str(self.name)) 27 | super().save(*args, **kwargs) 28 | 29 | 30 | class Language(models.Model): 31 | name = models.CharField(max_length=50, 32 | verbose_name='Язык программирования', 33 | unique=True) 34 | slug = models.CharField(max_length=50, blank=True, unique=True) 35 | 36 | class Meta: 37 | verbose_name = 'Язык программирования' 38 | verbose_name_plural = 'Языки программирования' 39 | 40 | def __str__(self): 41 | return self.name 42 | 43 | def save(self, *args, **kwargs): 44 | if not self.slug: 45 | self.slug = from_cyrillic_to_eng(str(self.name)) 46 | super().save(*args, **kwargs) 47 | 48 | 49 | class Vacancy(models.Model): 50 | url = models.URLField(unique=True) 51 | title = models.CharField(max_length=250, verbose_name='Заголовок вакансии') 52 | company = models.CharField(max_length=250, verbose_name='Компания') 53 | description = models.TextField(verbose_name='Описание вакансии') 54 | city = models.ForeignKey('City', on_delete=models.CASCADE, 55 | verbose_name='Город', related_name='vacancies') 56 | language = models.ForeignKey('Language', on_delete=models.CASCADE, 57 | verbose_name='Язык программирования') 58 | timestamp = models.DateField(auto_now_add=True) 59 | 60 | class Meta: 61 | verbose_name = 'Вакансия' 62 | verbose_name_plural = 'Вакансии' 63 | ordering = ['-timestamp'] 64 | 65 | def __str__(self): 66 | return self.title 67 | 68 | 69 | class Error(models.Model): 70 | timestamp = models.DateField(auto_now_add=True) 71 | data = jsonfield.JSONField() 72 | 73 | def __str__(self): 74 | return str(self.timestamp) 75 | 76 | 77 | class Url(models.Model): 78 | city = models.ForeignKey('City', on_delete=models.CASCADE, 79 | verbose_name='Город') 80 | language = models.ForeignKey('Language', on_delete=models.CASCADE, 81 | verbose_name='Язык программирования') 82 | url_data = jsonfield.JSONField(default=default_urls) 83 | 84 | class Meta: 85 | unique_together = ("city", "language") 86 | -------------------------------------------------------------------------------- /src/accounts/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | from django.contrib.auth import get_user_model, authenticate 3 | from django.contrib.auth.hashers import check_password 4 | 5 | from scraping.models import City, Language 6 | 7 | User = get_user_model() 8 | 9 | 10 | class UserLoginForm(forms.Form): 11 | email = forms.EmailField(widget=forms.EmailInput(attrs={'class': 'form-control'})) 12 | password = forms.CharField(widget=forms.PasswordInput(attrs={'class': 'form-control'})) 13 | 14 | def clean(self, *args, **kwargs): 15 | email = self.cleaned_data.get('email').strip() 16 | password = self.cleaned_data.get('password').strip() 17 | 18 | if email and password: 19 | qs = User.objects.filter(email=email) 20 | if not qs.exists(): 21 | raise forms.ValidationError('Такого пользователя нет!') 22 | if not check_password(password, qs[0].password): 23 | raise forms.ValidationError('Пароль не верный!') 24 | user = authenticate(email=email, password=password) 25 | if not user: 26 | raise forms.ValidationError('Данный аккаунт отключен') 27 | return super(UserLoginForm, self).clean(*args, **kwargs) 28 | 29 | 30 | class UserRegistrationForm(forms.ModelForm): 31 | email = forms.EmailField(label='Введите имэйл', 32 | widget=forms.EmailInput(attrs={'class': 'form-control'})) 33 | password = forms.CharField(label='Введите пароль', 34 | widget=forms.PasswordInput(attrs={'class': 'form-control'})) 35 | password2 = forms.CharField(label='Введите пароль ещё раз', 36 | widget=forms.PasswordInput(attrs={'class': 'form-control'})) 37 | 38 | class Meta: 39 | model = User 40 | fields = ('email',) 41 | 42 | def clean_password2(self): 43 | data = self.cleaned_data 44 | if data['password'] != data['password2']: 45 | raise forms.ValidationError('Пароли не совпадают!') 46 | return data['password2'] 47 | 48 | 49 | class UserUpdateForm(forms.Form): 50 | city = forms.ModelChoiceField( 51 | queryset=City.objects.all(), to_field_name="slug", required=True, 52 | widget=forms.Select(attrs={'class': 'form-control'}), 53 | label='Город' 54 | ) 55 | language = forms.ModelChoiceField( 56 | queryset=Language.objects.all(), to_field_name="slug", required=True, 57 | widget=forms.Select(attrs={'class': 'form-control'}), 58 | label='Специальность' 59 | ) 60 | send_email = forms.BooleanField(required=False, widget=forms.CheckboxInput, 61 | label='Получать рассылку?') 62 | 63 | class Meta: 64 | model = User 65 | fields = ('city', 'language', 'send_email') 66 | 67 | 68 | class ContactForm(forms.Form): 69 | city = forms.CharField( 70 | required=True, widget=forms.TextInput(attrs={'class': 'form-control'}), 71 | label='Город' 72 | ) 73 | language = forms.CharField( 74 | required=True, widget=forms.TextInput(attrs={'class': 'form-control'}), 75 | label='Специальность' 76 | ) 77 | email = forms.EmailField( 78 | label='Введите имэйл', required=True, widget=forms.EmailInput( 79 | attrs={'class': 'form-control'}) 80 | ) 81 | -------------------------------------------------------------------------------- /src/accounts/admin.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | from django.contrib import admin 3 | from django.contrib.auth.models import Group 4 | from django.contrib.auth.admin import UserAdmin as BaseUserAdmin 5 | from django.contrib.auth.forms import ReadOnlyPasswordHashField 6 | 7 | from accounts.models import MyUser 8 | 9 | 10 | class UserCreationForm(forms.ModelForm): 11 | """A form for creating new users. Includes all the required 12 | fields, plus a repeated password.""" 13 | password1 = forms.CharField(label='Password', widget=forms.PasswordInput) 14 | password2 = forms.CharField(label='Password confirmation', widget=forms.PasswordInput) 15 | 16 | class Meta: 17 | model = MyUser 18 | fields = ('email', ) 19 | 20 | def clean_password2(self): 21 | # Check that the two password entries match 22 | password1 = self.cleaned_data.get("password1") 23 | password2 = self.cleaned_data.get("password2") 24 | if password1 and password2 and password1 != password2: 25 | raise forms.ValidationError("Passwords don't match") 26 | return password2 27 | 28 | def save(self, commit=True): 29 | # Save the provided password in hashed format 30 | user = super().save(commit=False) 31 | user.set_password(self.cleaned_data["password1"]) 32 | if commit: 33 | user.save() 34 | return user 35 | 36 | 37 | class UserChangeForm(forms.ModelForm): 38 | """A form for updating users. Includes all the fields on 39 | the user, but replaces the password field with admin's 40 | password hash display field. 41 | """ 42 | password = ReadOnlyPasswordHashField() 43 | 44 | class Meta: 45 | model = MyUser 46 | fields = ('email', 'password', 'is_active', 'is_admin') 47 | 48 | def clean_password(self): 49 | # Regardless of what the user provides, return the initial value. 50 | # This is done here, rather than on the field, because the 51 | # field does not have access to the initial value 52 | return self.initial["password"] 53 | 54 | 55 | class UserAdmin(BaseUserAdmin): 56 | # The forms to add and change user instances 57 | form = UserChangeForm 58 | add_form = UserCreationForm 59 | 60 | # The fields to be used in displaying the User model. 61 | # These override the definitions on the base UserAdmin 62 | # that reference specific fields on auth.User. 63 | list_display = ('email', 'is_admin', 'city', 'language', 'send_email') 64 | list_filter = ('is_admin',) 65 | fieldsets = ( 66 | (None, {'fields': ('email', 'password')}), 67 | ('Vacancy', {'fields': ('city', 'language', 'send_email')}), 68 | ('Permissions', {'fields': ('is_admin',)}), 69 | ) 70 | # add_fieldsets is not a standard ModelAdmin attribute. UserAdmin 71 | # overrides get_fieldsets to use this attribute when creating a user. 72 | add_fieldsets = ( 73 | (None, { 74 | 'classes': ('wide',), 75 | 'fields': ('email', 'password1', 'password2'), 76 | }), 77 | ) 78 | search_fields = ('email',) 79 | ordering = ('email',) 80 | filter_horizontal = () 81 | 82 | 83 | # Now register the new UserAdmin... 84 | admin.site.register(MyUser, UserAdmin) 85 | # ... and, since we're not using Django's built-in permissions, 86 | # unregister the Group model from admin. 87 | admin.site.unregister(Group) 88 | -------------------------------------------------------------------------------- /src/scraping/views.py: -------------------------------------------------------------------------------- 1 | from django.contrib import messages 2 | from django.core.paginator import Paginator 3 | from django.shortcuts import render, get_object_or_404 4 | from django.urls import reverse_lazy 5 | from django.views.generic import DetailView, ListView, CreateView, UpdateView, \ 6 | DeleteView 7 | 8 | from .forms import FindForm, VForm 9 | from .models import Vacancy 10 | 11 | 12 | def home_view(request): 13 | # print(request.GET) 14 | form = FindForm() 15 | 16 | return render(request, 'scraping/home.html', {'form': form}) 17 | 18 | 19 | def list_view(request): 20 | # print(request.GET) 21 | form = FindForm() 22 | city = request.GET.get('city') 23 | language = request.GET.get('language') 24 | context = {'city': city, 'language': language, 'form': form} 25 | if city or language: 26 | _filter = {} 27 | if city: 28 | _filter['city__slug'] = city 29 | if language: 30 | _filter['language__slug'] = language 31 | 32 | qs = Vacancy.objects.filter(**_filter).select_related('city', 'language') 33 | paginator = Paginator(qs, 10) # Show 10 contacts per page. 34 | 35 | page_number = request.GET.get('page') 36 | page_obj = paginator.get_page(page_number) 37 | context['object_list'] = page_obj 38 | return render(request, 'scraping/list.html', context) 39 | 40 | 41 | def v_detail(request, pk=None): 42 | # object_ = Vacancy.objects.get(pk=pk) 43 | object_ = get_object_or_404(Vacancy, pk=pk) 44 | return render(request, 'scraping/detail.html', {'object': object_}) 45 | 46 | 47 | class VDetail(DetailView): 48 | queryset = Vacancy.objects.all() 49 | template_name = 'scraping/detail.html' 50 | # context_object_name = 'object' 51 | 52 | 53 | class VList(ListView): 54 | model = Vacancy 55 | template_name = 'scraping/list.html' 56 | form = FindForm() 57 | paginate_by = 10 58 | 59 | def get_context_data(self, **kwargs): 60 | # Call the base implementation first to get a context 61 | context = super().get_context_data(**kwargs) 62 | context['city'] = self.request.GET.get('city') 63 | context['language'] = self.request.GET.get('language') 64 | context['form'] = self.form 65 | 66 | return context 67 | 68 | def get_queryset(self): 69 | city = self.request.GET.get('city') 70 | language = self.request.GET.get('language') 71 | qs = [] 72 | if city or language: 73 | _filter = {} 74 | if city: 75 | _filter['city__slug'] = city 76 | if language: 77 | _filter['language__slug'] = language 78 | qs = Vacancy.objects.filter(**_filter).select_related('city', 'language') 79 | return qs 80 | 81 | 82 | class VCreate(CreateView): 83 | model = Vacancy 84 | # fields = '__all__' 85 | form_class = VForm 86 | template_name = 'scraping/create.html' 87 | success_url = reverse_lazy('home') 88 | 89 | 90 | class VUpdate(UpdateView): 91 | model = Vacancy 92 | form_class = VForm 93 | template_name = 'scraping/create.html' 94 | success_url = reverse_lazy('home') 95 | 96 | 97 | class VDelete(DeleteView): 98 | model = Vacancy 99 | # template_name = 'scraping/delete.html' 100 | success_url = reverse_lazy('home') 101 | 102 | def get(self, request, *args, **kwargs): 103 | messages.success(request, 'Запись успешно удалена.') 104 | return self.post(request, *args, **kwargs) 105 | -------------------------------------------------------------------------------- /src/accounts/views.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from django.shortcuts import render, redirect 3 | from django.contrib.auth import authenticate, login, logout, get_user_model 4 | from django.contrib import messages 5 | 6 | from accounts.forms import UserLoginForm, UserRegistrationForm, UserUpdateForm, \ 7 | ContactForm 8 | from scraping.models import Error 9 | 10 | User = get_user_model() 11 | 12 | def login_view(request): 13 | form = UserLoginForm(request.POST or None) 14 | if form.is_valid(): 15 | data = form.cleaned_data 16 | email = data.get('email') 17 | password = data.get('password') 18 | user = authenticate(request, email=email, password=password) 19 | login(request, user) 20 | return redirect('home') 21 | return render(request, 'accounts/login.html', {'form': form}) 22 | 23 | 24 | def logout_view(request): 25 | logout(request) 26 | return redirect('home') 27 | 28 | 29 | def register_view(request): 30 | form = UserRegistrationForm(request.POST or None) 31 | if form.is_valid(): 32 | new_user = form.save(commit=False) 33 | new_user.set_password(form.cleaned_data['password']) 34 | new_user.save() 35 | messages.success(request, 'Пользователь добавлен в систему.') 36 | return render(request, 'accounts/register_done.html', 37 | {'new_user': new_user}) 38 | return render(request, 'accounts/register.html', {'form': form}) 39 | 40 | 41 | def update_view(request): 42 | contact_form = ContactForm() 43 | if request.user.is_authenticated: 44 | user = request.user 45 | if request.method == 'POST': 46 | form = UserUpdateForm(request.POST) 47 | if form.is_valid(): 48 | data = form.cleaned_data 49 | user.city = data['city'] 50 | user.language = data['language'] 51 | user.send_email = data['send_email'] 52 | user.save() 53 | messages.success(request, 'Данные сохраненны.') 54 | return redirect('accounts:update') 55 | 56 | form = UserUpdateForm( 57 | initial={'city': user.city, 'language': user.language, 58 | 'send_email': user.send_email}) 59 | return render(request, 'accounts/update.html', 60 | {'form': form, 'contact_form': contact_form}) 61 | else: 62 | return redirect('accounts:login') 63 | 64 | 65 | def delete_view(request): 66 | if request.user.is_authenticated: 67 | user = request.user 68 | if request.method == 'POST': 69 | qs = User.objects.get(pk=user.pk) 70 | qs.delete() 71 | messages.error(request, 'Пользователь удален :(') 72 | return redirect('home') 73 | 74 | 75 | def contact(request): 76 | if request.method == 'POST': 77 | contact_form = ContactForm(request.POST or None) 78 | if contact_form.is_valid(): 79 | data = contact_form.cleaned_data 80 | city = data.get('city') 81 | language = data.get('language') 82 | email = data.get('email') 83 | qs = Error.objects.filter(timestamp=dt.date.today()) 84 | if qs.exists(): 85 | err = qs.first() 86 | data = err.data.get('user_data', []) 87 | data.append({'city': city, 'email': email, 'language': language}) 88 | err.data['user_data'] = data 89 | err.save() 90 | else: 91 | data = {'user_data': [ 92 | {'city': city, 'email': email, 'language': language} 93 | ]} 94 | Error(data=data).save() 95 | messages.success(request, 'Данные отправлены администрации.') 96 | return redirect('accounts:update') 97 | else: 98 | return redirect('accounts:update') 99 | else: 100 | return redirect('accounts:login') 101 | -------------------------------------------------------------------------------- /src/scraping/templates/scraping/list.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} Job Finder {% endblock %} 4 | 5 | {% block content %} 6 | 7 | 8 | 9 | 10 | {% if object_list %} 11 |

Результаты поиска согласно Вашего запроса.

12 | {% for obj in object_list %} 13 |
14 |
{{ obj.title }}
15 |
16 |

{{ obj.description }}

17 |

{{ obj.company }}

18 |

{{obj.city.name }} | {{obj.language.name }} | 19 | {{obj.timestamp|date:"d-m-Y" }}

20 | 21 |
22 |
23 | {% endfor %} 24 |
25 |
26 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
70 |
71 | {% else %} 72 |

К сожалению, по Вашему запросу, ничего не найдено.

73 | {% endif %} 74 | {{ page_obj }} 75 |
76 |
77 |

Новый поиск

78 |
79 |
80 | {{ form.as_p }} 81 |
82 | 83 |
84 |
85 |
86 | 87 | {% endblock %} 88 | -------------------------------------------------------------------------------- /src/templates/base.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | {% url 'accounts:update' as update_url %} 3 | {% url 'accounts:register' as register_url %} 4 | {% url 'accounts:login' as login_url%} 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | {% block title %}My amazing site{% endblock %} 16 | 17 | 18 |
19 | 46 |
47 |
48 |
49 |
50 | {% if messages %} 51 | 52 | {% for message in messages %} 53 | {% if message.level == DEFAULT_MESSAGE_LEVELS.ERROR %} 54 | 60 | {% else %} 61 | 67 | {% endif %} 68 | 69 | 70 | {% endfor %} 71 | 72 | {% endif %} 73 |
74 |
75 |
76 |
77 | {% block content %}{% endblock %} 78 |
79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/send_emails.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import django 3 | import datetime 4 | from django.core.mail import EmailMultiAlternatives 5 | 6 | from django.contrib.auth import get_user_model 7 | 8 | proj = os.path.dirname(os.path.abspath('manage.py')) 9 | sys.path.append(proj) 10 | os.environ["DJANGO_SETTINGS_MODULE"] = "scraping_service.settings" 11 | 12 | django.setup() 13 | from scraping.models import Vacancy, Error, Url 14 | from scraping_service.settings import ( 15 | EMAIL_HOST_USER, 16 | EMAIL_HOST, EMAIL_HOST_PASSWORD 17 | ) 18 | 19 | ADMIN_USER = EMAIL_HOST_USER 20 | 21 | today = datetime.date.today() 22 | subject = f"Рассылка вакансий за {today}" 23 | text_content = f"Рассылка вакансий {today}" 24 | from_email = EMAIL_HOST_USER 25 | empty = '

К сожалению на сегодня по Вашим предпочтениям данных нет.

' 26 | 27 | User = get_user_model() 28 | qs = User.objects.filter(send_email=True).values('city', 'language', 'email') 29 | users_dct = {} 30 | for i in qs: 31 | users_dct.setdefault((i['city'], i['language']), []) 32 | users_dct[(i['city'], i['language'])].append(i['email']) 33 | if users_dct: 34 | params = {'city_id__in': [], 'language_id__in': []} 35 | for pair in users_dct.keys(): 36 | params['city_id__in'].append(pair[0]) 37 | params['language_id__in'].append(pair[1]) 38 | qs = Vacancy.objects.filter(**params, timestamp=today).values() 39 | vacancies = {} 40 | for i in qs: 41 | vacancies.setdefault((i['city_id'], i['language_id']), []) 42 | vacancies[(i['city_id'], i['language_id'])].append(i) 43 | for keys, emails in users_dct.items(): 44 | rows = vacancies.get(keys, []) 45 | html = '' 46 | for row in rows: 47 | html += f'{ row["title"] }' 48 | html += f'

{row["description"]}

' 49 | html += f'

{row["company"]}



' 50 | _html = html if html else empty 51 | for email in emails: 52 | to = email 53 | msg = EmailMultiAlternatives( 54 | subject, text_content, from_email, [to] 55 | ) 56 | msg.attach_alternative(_html, "text/html") 57 | msg.send() 58 | 59 | qs = Error.objects.filter(timestamp=today) 60 | subject = '' 61 | text_content = '' 62 | to = ADMIN_USER 63 | _html = '' 64 | if qs.exists(): 65 | error = qs.first() 66 | data = error.data.get('errors', []) 67 | for i in data: 68 | _html += f'Error: { i["title"] }


' 69 | subject += f"Ошибки скрапинга {today}" 70 | text_content += "Ошибки скрапинга" 71 | data = error.data.get('user_data') 72 | if data: 73 | _html += '
' 74 | _html += '

Пожелания пользователей

' 75 | for i in data: 76 | _html += f'Город: {i["city"]}, Специальность:{i["language"]}, Имейл:{i["email"]}


' 77 | subject += f" Пожелания пользователей {today}" 78 | text_content += "Пожелания пользователей" 79 | 80 | qs = Url.objects.all().values('city', 'language') 81 | urls_dct = {(i['city'], i['language']): True for i in qs} 82 | urls_err = '' 83 | for keys in users_dct.keys(): 84 | if keys not in urls_dct: 85 | if keys[0] and keys[1]: 86 | urls_err += f' Для города: {keys[0]} и ЯП: {keys[1]} отсутствуют урлы


' 87 | if urls_err: 88 | subject += ' Отсутствующие урлы ' 89 | _html += '
' 90 | _html += '

Отсутствующие урлы

' 91 | _html += urls_err 92 | 93 | if subject: 94 | msg = EmailMultiAlternatives(subject, text_content, from_email, [to]) 95 | msg.attach_alternative(_html, "text/html") 96 | msg.send() 97 | 98 | # import smtplib 99 | # from email.mime.multipart import MIMEMultipart 100 | # from email.mime.text import MIMEText 101 | # 102 | # msg = MIMEMultipart('alternative') 103 | # msg['Subject'] = 'Список вакансий за {}'.format(today) 104 | # msg['From'] = EMAIL_HOST_USER 105 | # mail = smtplib.SMTP() 106 | # mail.connect(EMAIL_HOST, 25) 107 | # mail.ehlo() 108 | # mail.starttls() 109 | # mail.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD) 110 | # 111 | # html_m = "

Hello world

" 112 | # part = MIMEText(html_m, 'html') 113 | # msg.attach(part) 114 | # mail.sendmail(EMAIL_HOST_USER, [to], msg.as_string()) 115 | # mail.quit() 116 | -------------------------------------------------------------------------------- /src/scraping_service/settings/production.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for scraping_service project. 3 | 4 | Generated by 'django-admin startproject' using Django 3.0.3. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/3.0/ref/settings/ 11 | """ 12 | 13 | import os 14 | import dj_database_url 15 | 16 | EMAIL_HOST_USER = os.getenv("EMAIL_HOST_USER") 17 | EMAIL_HOST_PASSWORD = os.getenv("EMAIL_HOST_PASSWORD") 18 | EMAIL_HOST = os.getenv("EMAIL_HOST") 19 | EMAIL_PORT = os.getenv("EMAIL_PORT") 20 | DB_NAME = os.environ.get('DB_NAME') 21 | DB_PASSWORD = os.environ.get('DB_PASSWORD') 22 | DB_HOST = os.environ.get('DB_HOST') 23 | DB_USER = os.environ.get('DB_USER') 24 | SECRET_KEY = os.environ.get('SECRET_KEY') 25 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 26 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 27 | 28 | 29 | # Quick-start development settings - unsuitable for production 30 | # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ 31 | 32 | # SECURITY WARNING: keep the secret key used in production secret! 33 | # SECRET_KEY = 'l9*06s&fzt(&z(np)2gx6=-rvl=@s(cnw!@5&i#iw^s!ayw@+r' 34 | SECRET_KEY = SECRET_KEY 35 | 36 | # SECURITY WARNING: don't run with debug turned on in production! 37 | DEBUG = False 38 | 39 | ALLOWED_HOSTS = ["serv-scrap.herokuapp.com"] 40 | 41 | 42 | # Application definition 43 | 44 | INSTALLED_APPS = [ 45 | 'django.contrib.admin', 46 | 'django.contrib.auth', 47 | 'django.contrib.contenttypes', 48 | 'django.contrib.sessions', 49 | 'django.contrib.messages', 50 | 'django.contrib.staticfiles', 51 | 'scraping', 52 | 'accounts', 53 | ] 54 | 55 | MIDDLEWARE = [ 56 | 'django.middleware.security.SecurityMiddleware', 57 | 'whitenoise.middleware.WhiteNoiseMiddleware', 58 | 'django.contrib.sessions.middleware.SessionMiddleware', 59 | 'django.middleware.common.CommonMiddleware', 60 | 'django.middleware.csrf.CsrfViewMiddleware', 61 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 62 | 'django.contrib.messages.middleware.MessageMiddleware', 63 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 64 | ] 65 | 66 | ROOT_URLCONF = 'scraping_service.urls' 67 | 68 | TEMPLATES = [ 69 | { 70 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 71 | 'DIRS': [os.path.join(BASE_DIR, 'templates')], 72 | 'APP_DIRS': True, 73 | 'OPTIONS': { 74 | 'context_processors': [ 75 | 'django.template.context_processors.debug', 76 | 'django.template.context_processors.request', 77 | 'django.contrib.auth.context_processors.auth', 78 | 'django.contrib.messages.context_processors.messages', 79 | ], 80 | }, 81 | }, 82 | ] 83 | 84 | WSGI_APPLICATION = 'scraping_service.wsgi.application' 85 | 86 | 87 | # Database 88 | # https://docs.djangoproject.com/en/3.0/ref/settings/#databases 89 | 90 | DATABASES = { 91 | 'default': { 92 | 'ENGINE': 'django.db.backends.postgresql', 93 | 'NAME': DB_NAME, 94 | 'USER': DB_USER, 95 | 'PASSWORD': DB_PASSWORD, 96 | 'HOST': DB_HOST, 97 | 'PORT': '5432', 98 | } 99 | } 100 | 101 | db = dj_database_url.config() 102 | DATABASES['default'].update(db) 103 | 104 | # Password validation 105 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators 106 | 107 | AUTH_PASSWORD_VALIDATORS = [ 108 | { 109 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 110 | }, 111 | { 112 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 113 | }, 114 | { 115 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 116 | }, 117 | { 118 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 119 | }, 120 | ] 121 | 122 | 123 | # Internationalization 124 | # https://docs.djangoproject.com/en/3.0/topics/i18n/ 125 | 126 | LANGUAGE_CODE = 'ru-ru' 127 | 128 | TIME_ZONE = 'UTC' 129 | 130 | USE_I18N = True 131 | 132 | USE_L10N = True 133 | 134 | USE_TZ = True 135 | 136 | 137 | # Static files (CSS, JavaScript, Images) 138 | # https://docs.djangoproject.com/en/3.0/howto/static-files/ 139 | 140 | STATIC_URL = '/static/' 141 | 142 | AUTH_USER_MODEL = 'accounts.MyUser' 143 | 144 | EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' 145 | EMAIL_HOST = EMAIL_HOST 146 | EMAIL_USE_TLS = True 147 | EMAIL_PORT = EMAIL_PORT 148 | EMAIL_HOST_USER = EMAIL_HOST_USER 149 | EMAIL_HOST_PASSWORD = EMAIL_HOST_PASSWORD 150 | 151 | STATICFILES_DIRS = (os.path.join(BASE_DIR, 'static'),) 152 | STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles') 153 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/pycharm,python,django 3 | # Edit at https://www.gitignore.io/?templates=pycharm,python,django 4 | 5 | ### Django ### 6 | *.log 7 | *.pot 8 | *.pyc 9 | __pycache__/ 10 | local_settings.py 11 | db.sqlite3 12 | db.sqlite3-journal 13 | media 14 | 15 | .env 16 | # If your build process includes running collectstatic, then you probably don't need or want to include staticfiles/ 17 | # in your Git repository. Update and uncomment the following line accordingly. 18 | # /staticfiles/ 19 | 20 | ### Django.Python Stack ### 21 | # Byte-compiled / optimized / DLL files 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | pip-wheel-metadata/ 43 | share/python-wheels/ 44 | *.egg-info/ 45 | .installed.cfg 46 | *.egg 47 | MANIFEST 48 | 49 | # PyInstaller 50 | # Usually these files are written by a python script from a template 51 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 52 | *.manifest 53 | *.spec 54 | 55 | # Installer logs 56 | pip-log.txt 57 | pip-delete-this-directory.txt 58 | 59 | # Unit test / coverage reports 60 | htmlcov/ 61 | .tox/ 62 | .nox/ 63 | .coverage 64 | .coverage.* 65 | .cache 66 | nosetests.xml 67 | coverage.xml 68 | *.cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | 72 | # Translations 73 | *.mo 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # Mr Developer 108 | .mr.developer.cfg 109 | .project 110 | .pydevproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | .dmypy.json 118 | dmypy.json 119 | 120 | # Pyre type checker 121 | .pyre/ 122 | 123 | ### PyCharm ### 124 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 125 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 126 | 127 | # User-specific stuff 128 | .idea/**/workspace.xml 129 | .idea/**/tasks.xml 130 | .idea/**/usage.statistics.xml 131 | .idea/**/dictionaries 132 | .idea/**/shelf 133 | 134 | # Generated files 135 | .idea/**/contentModel.xml 136 | 137 | # Sensitive or high-churn files 138 | .idea/**/dataSources/ 139 | .idea/**/dataSources.ids 140 | .idea/**/dataSources.local.xml 141 | .idea/**/sqlDataSources.xml 142 | .idea/**/dynamic.xml 143 | .idea/**/uiDesigner.xml 144 | .idea/**/dbnavigator.xml 145 | 146 | # Gradle 147 | .idea/**/gradle.xml 148 | .idea/**/libraries 149 | 150 | # Gradle and Maven with auto-import 151 | # When using Gradle or Maven with auto-import, you should exclude module files, 152 | # since they will be recreated, and may cause churn. Uncomment if using 153 | # auto-import. 154 | # .idea/modules.xml 155 | # .idea/*.iml 156 | # .idea/modules 157 | # *.iml 158 | # *.ipr 159 | 160 | # CMake 161 | cmake-build-*/ 162 | 163 | # Mongo Explorer plugin 164 | .idea/**/mongoSettings.xml 165 | 166 | # File-based project format 167 | *.iws 168 | 169 | # IntelliJ 170 | out/ 171 | 172 | # mpeltonen/sbt-idea plugin 173 | .idea_modules/ 174 | 175 | # JIRA plugin 176 | atlassian-ide-plugin.xml 177 | 178 | # Cursive Clojure plugin 179 | .idea/replstate.xml 180 | 181 | # Crashlytics plugin (for Android Studio and IntelliJ) 182 | com_crashlytics_export_strings.xml 183 | crashlytics.properties 184 | crashlytics-build.properties 185 | fabric.properties 186 | 187 | # Editor-based Rest Client 188 | .idea/httpRequests 189 | 190 | # Android studio 3.1+ serialized cache file 191 | .idea/caches/build_file_checksums.ser 192 | 193 | ### PyCharm Patch ### 194 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 195 | 196 | # *.iml 197 | # modules.xml 198 | # .idea/misc.xml 199 | # *.ipr 200 | 201 | # Sonarlint plugin 202 | .idea/**/sonarlint/ 203 | 204 | # SonarQube Plugin 205 | .idea/**/sonarIssues.xml 206 | 207 | # Markdown Navigator plugin 208 | .idea/**/markdown-navigator.xml 209 | .idea/**/markdown-navigator/ 210 | 211 | ### Python ### 212 | # Byte-compiled / optimized / DLL files 213 | 214 | # C extensions 215 | 216 | # Distribution / packaging 217 | 218 | # PyInstaller 219 | # Usually these files are written by a python script from a template 220 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 221 | 222 | # Installer logs 223 | 224 | # Unit test / coverage reports 225 | 226 | # Translations 227 | 228 | # Scrapy stuff: 229 | 230 | # Sphinx documentation 231 | 232 | # PyBuilder 233 | 234 | # pyenv 235 | 236 | # pipenv 237 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 238 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 239 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 240 | # install all needed dependencies. 241 | 242 | # celery beat schedule file 243 | 244 | # SageMath parsed files 245 | 246 | # Spyder project settings 247 | 248 | # Rope project settings 249 | 250 | # Mr Developer 251 | 252 | # mkdocs documentation 253 | 254 | # mypy 255 | 256 | # Pyre type checker 257 | 258 | # End of https://www.gitignore.io/api/pycharm,python,django 259 | -------------------------------------------------------------------------------- /src/scraping/parsers.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import codecs 3 | from bs4 import BeautifulSoup as BS 4 | from random import randint 5 | 6 | __all__ = ('work', "rabota", 'dou', 'djinni') 7 | 8 | headers = [ 9 | {'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:47.0) Gecko/20100101 Firefox/47.0', 10 | 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, 11 | {'User-Agent': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36', 12 | 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, 13 | {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0', 14 | 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'} 15 | ] 16 | 17 | 18 | def work(url, city=None, language=None): 19 | jobs = [] 20 | errors = [] 21 | domain = 'https://www.work.ua' 22 | if url: 23 | resp = requests.get(url, headers=headers[randint(0, 2)]) 24 | if resp.status_code == 200: 25 | soup = BS(resp.content, 'html.parser') 26 | main_div = soup.find('div', id='pjax-job-list') 27 | if main_div: 28 | div_lst = main_div.find_all('div', attrs={'class': 'job-link'}) 29 | for div in div_lst: 30 | title = div.find('h2') 31 | href = title.a['href'] 32 | content = div.p.text 33 | company = 'No name' 34 | logo = div.find('img') 35 | if logo: 36 | company = logo['alt'] 37 | jobs.append({'title': title.text, 'url': domain + href, 38 | 'description': content, 'company': company, 39 | 'city_id': city, 'language_id': language}) 40 | else: 41 | errors.append({'url': url, 'title': "Div does not exists"}) 42 | else: 43 | errors.append({'url': url, 'title': "Page do not response"}) 44 | 45 | return jobs, errors 46 | 47 | 48 | def rabota(url, city=None, language=None): 49 | jobs = [] 50 | errors = [] 51 | domain = 'https://rabota.ua' 52 | if url: 53 | resp = requests.get(url, headers=headers[randint(0, 2)]) 54 | if resp.status_code == 200: 55 | soup = BS(resp.content, 'html.parser') 56 | new_jobs = soup.find('div', 57 | attrs={'class': 'f-vacancylist-newnotfound'}) 58 | if not new_jobs: 59 | table = soup.find('table', 60 | id='ctl00_content_vacancyList_gridList') 61 | if table: 62 | tr_lst = table.find_all('tr', attrs={'id': True}) 63 | for tr in tr_lst: 64 | div = tr.find('div', attrs={'class': 'card-body'}) 65 | if div: 66 | title = div.find('p', 67 | attrs={'class': 'card-title'}) 68 | href = title.a['href'] 69 | content = div.p.text 70 | company = 'No name' 71 | p = div.find('p', attrs={'class': 'company-name'}) 72 | if p: 73 | company = p.a.text 74 | jobs.append({ 75 | 'title': title.text, 76 | 'url': domain + href, 77 | 'description': content, 78 | 'company': company, 79 | 'city_id': city, 'language_id': language}) 80 | else: 81 | errors.append({'url': url, 'title': "Table does not exists"}) 82 | else: 83 | errors.append({'url': url, 'title': "Page is empty"}) 84 | else: 85 | errors.append({'url': url, 'title': "Page do not response"}) 86 | 87 | return jobs, errors 88 | 89 | 90 | def dou(url, city=None, language=None): 91 | jobs = [] 92 | errors = [] 93 | # domain = 'https://www.work.ua' 94 | if url: 95 | resp = requests.get(url, headers=headers[randint(0, 2)]) 96 | if resp.status_code == 200: 97 | soup = BS(resp.content, 'html.parser') 98 | main_div = soup.find('div', id='vacancyListId') 99 | if main_div: 100 | li_lst = main_div.find_all('li', attrs={'class': 'l-vacancy'}) 101 | for li in li_lst: 102 | title = li.find('div', attrs={'class': 'title'}) 103 | href = title.a['href'] 104 | cont = li.find('div', attrs={'class': 'sh-info'}) 105 | content = cont.text 106 | company = 'No name' 107 | a = title.find('a', attrs={'class': 'company'}) 108 | if a: 109 | company = a.text 110 | jobs.append({'title': title.text, 'url': href, 111 | 'description': content, 'company': company, 112 | 'city_id': city, 'language_id': language}) 113 | else: 114 | errors.append({'url': url, 'title': "Div does not exists"}) 115 | else: 116 | errors.append({'url': url, 'title': "Page do not response"}) 117 | 118 | return jobs, errors 119 | 120 | 121 | def djinni(url, city=None, language=None): 122 | jobs = [] 123 | errors = [] 124 | domain = 'https://djinni.co' 125 | if url: 126 | resp = requests.get(url, headers=headers[randint(0, 2)]) 127 | if resp.status_code == 200: 128 | soup = BS(resp.content, 'html.parser') 129 | main_ul = soup.find('ul', attrs={'class': 'list-jobs'}) 130 | if main_ul: 131 | li_lst = main_ul.find_all('li', 132 | attrs={'class': 'list-jobs__item'}) 133 | for li in li_lst: 134 | title = li.find('div', 135 | attrs={'class': 'list-jobs__title'}) 136 | href = title.a['href'] 137 | cont = li.find('div', 138 | attrs={'class': 'list-jobs__description'}) 139 | content = cont.text 140 | company = 'No name' 141 | comp = li.find('div', 142 | attrs={'class': 'list-jobs__details__info'}) 143 | if comp: 144 | company = comp.text 145 | jobs.append({'title': title.text, 'url': domain + href, 146 | 'description': content, 'company': company, 147 | 'city_id': city, 'language_id': language}) 148 | else: 149 | errors.append({'url': url, 'title': "Div does not exists"}) 150 | else: 151 | errors.append({'url': url, 'title': "Page do not response"}) 152 | 153 | return jobs, errors 154 | 155 | 156 | if __name__ == '__main__': 157 | url = 'https://djinni.co/jobs/?location=%D0%9A%D0%B8%D0%B5%D0%B2&primary_keyword=Python' 158 | jobs, errors = djinni(url) 159 | h = codecs.open('work.txt', 'w', 'utf-8') 160 | h.write(str(jobs)) 161 | h.close() 162 | --------------------------------------------------------------------------------