├── cli ├── __init__.py ├── test_cli.py └── awesome_search.py ├── searchapp ├── config │ ├── __init__.py │ ├── views.py │ ├── asgi.py │ ├── wsgi.py │ ├── tests.py │ ├── keys.py │ ├── urls.py │ └── settings.py ├── search │ ├── __init__.py │ ├── apps.py │ ├── urls.py │ ├── formatter.py │ ├── views.py │ └── tests.py ├── users │ ├── __init__.py │ ├── apps.py │ ├── urls.py │ ├── serializers.py │ ├── views.py │ └── tests.py ├── indexer │ ├── __init__.py │ ├── tests.py │ ├── scrapers.py │ └── index.py ├── config.ini.sample ├── requirements.txt ├── main.py ├── app.yaml ├── .gcloudignore └── manage.py ├── assets ├── demo.png ├── diagram.png └── list_of_lists.txt ├── pyproject.toml ├── images └── app_preview_image.png ├── .gitignore ├── setup.py ├── LICENSE ├── marketplace.json ├── raycast └── awesome_search.py └── README.md /cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /searchapp/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /searchapp/search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /searchapp/users/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /searchapp/indexer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/demo.png -------------------------------------------------------------------------------- /assets/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/diagram.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /images/app_preview_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/awesome-search/master/images/app_preview_image.png -------------------------------------------------------------------------------- /searchapp/users/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class UsersConfig(AppConfig): 5 | name = 'users' 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | __pycache__ 3 | .DS_Store 4 | db.sqlite3 5 | db.my 6 | config.ini 7 | dist 8 | *.egg-info/ 9 | build 10 | -------------------------------------------------------------------------------- /searchapp/search/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class SearchConfig(AppConfig): 5 | default_auto_field = 'django.db.models.BigAutoField' 6 | name = 'search' 7 | -------------------------------------------------------------------------------- /searchapp/config/views.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.urls import path, include 3 | 4 | urlpatterns = [ 5 | path('admin/', admin.site.urls), 6 | path('', include('rest_framework.urls')) 7 | ] 8 | -------------------------------------------------------------------------------- /searchapp/config.ini.sample: -------------------------------------------------------------------------------- 1 | [redis] 2 | PORT=6379 3 | HOST=127.0.0.1 4 | PASSWORD= 5 | 6 | [github] 7 | ACCESS_TOKEN= 8 | 9 | [prod] 10 | SECRET_KEY= 11 | MYSQL_NAME=awesome-search-base 12 | MYSQL_USER=root 13 | MYSQL_CONNECTION_NAME= 14 | MYSQL_PASS= 15 | -------------------------------------------------------------------------------- /searchapp/requirements.txt: -------------------------------------------------------------------------------- 1 | Django==3.2 2 | redisearch==2.0.0 3 | tweepy==3.10.0 4 | json-config-parser==0.1.2 5 | djangorestframework==3.12.4 6 | django-cors-headers==3.7.0 7 | drf-nested-routers==0.93.3 8 | django-redis==4.12.1 9 | django-rest-knox 10 | PyMySQL==1.0.2 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='awesome-search', 5 | version='0.1.3', 6 | packages=['cli'], 7 | entry_points=''' 8 | [console_scripts] 9 | awesome=cli.awesome_search:main 10 | ''', 11 | install_requires=[] 12 | ) 13 | -------------------------------------------------------------------------------- /searchapp/search/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | 3 | from .views import general_search, languages, awesome_lists 4 | 5 | urlpatterns = [ 6 | path('', general_search, name="general-search"), 7 | path('languages', languages, name="search-languages"), 8 | path('awesome-lists', awesome_lists, name="search-lists") 9 | ] 10 | -------------------------------------------------------------------------------- /searchapp/config/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for searchapp project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /searchapp/config/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for searchapp project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /searchapp/users/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path, include 2 | from .views import RegisterAPI, LoginAPI, UserAPI 3 | from knox import views as knox_views 4 | 5 | urlpatterns = [ 6 | path('api/auth', include('knox.urls')), 7 | path('api/auth/register', RegisterAPI.as_view(), name='register'), 8 | path('api/auth/login', LoginAPI.as_view(), name='login'), 9 | path('api/auth/user', UserAPI.as_view()), 10 | path('api/auth/logout', knox_views.LogoutView.as_view(), name='knox_logout') 11 | ] 12 | -------------------------------------------------------------------------------- /searchapp/main.py: -------------------------------------------------------------------------------- 1 | from config.wsgi import application 2 | 3 | # App Engine by default looks for a main.py file at the root of the app 4 | # directory with a WSGI-compatible object called app. 5 | # This file imports the WSGI-compatible object of your Django app, 6 | # application from mysite/wsgi.py and renames it app so it is discoverable by 7 | # App Engine without additional configuration. 8 | # Alternatively, you can add a custom entrypoint field in your app.yaml: 9 | # entrypoint: gunicorn -b :$PORT mysite.wsgi 10 | app = application 11 | -------------------------------------------------------------------------------- /searchapp/app.yaml: -------------------------------------------------------------------------------- 1 | # [START django_app] 2 | runtime: python39 3 | 4 | service: awesome-search 5 | 6 | handlers: 7 | # # This configures Google App Engine to serve the files in the app's static 8 | # # directory. 9 | # - url: /static 10 | # static_dir: static/ 11 | 12 | # This handler routes all requests not caught above to your main app. It is 13 | # required when static routes are defined, but can be omitted (along with 14 | # the entire handlers section) when there are no static files defined. 15 | - url: /.* 16 | script: auto 17 | 18 | # [END django_app] 19 | -------------------------------------------------------------------------------- /searchapp/config/tests.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from .keys import Keys 4 | 5 | 6 | class KeyFormattingTests(TestCase): 7 | def test_repo(self): 8 | keys = Keys("pre") 9 | self.assertEqual(keys.github_repo("a", "b"), "pre:resource:github:a:b") 10 | self.assertEqual(keys.github_repo_lists("a", "b"), 11 | "pre:resource:github:a:b:lists") 12 | 13 | def test_lists(self): 14 | keys = Keys("pre2") 15 | self.assertEqual(keys.language_list(), "pre2:resource:data:languages") 16 | self.assertEqual(keys.awesome_list_list(), 17 | "pre2:resource:data:awesome_lists") 18 | -------------------------------------------------------------------------------- /searchapp/.gcloudignore: -------------------------------------------------------------------------------- 1 | # This file specifies files that are *not* uploaded to Google Cloud Platform 2 | # using gcloud. It follows the same syntax as .gitignore, with the addition of 3 | # "#!include" directives (which insert the entries of the given .gitignore-style 4 | # file at that point). 5 | # 6 | # For more information, run: 7 | # $ gcloud topic gcloudignore 8 | # 9 | .gcloudignore 10 | # If you would like to upload your .git directory, .gitignore file or files 11 | # from your .gitignore file, remove the corresponding line 12 | # below: 13 | .git 14 | .gitignore 15 | 16 | # Python pycache: 17 | __pycache__/ 18 | # Ignored by the build system 19 | /setup.cfg 20 | 21 | venv 22 | -------------------------------------------------------------------------------- /searchapp/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | """Run administrative tasks.""" 9 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') 10 | try: 11 | from django.core.management import execute_from_command_line 12 | except ImportError as exc: 13 | raise ImportError( 14 | "Couldn't import Django. Are you sure it's installed and " 15 | "available on your PYTHONPATH environment variable? Did you " 16 | "forget to activate a virtual environment?" 17 | ) from exc 18 | execute_from_command_line(sys.argv) 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /searchapp/config/keys.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Keys: 4 | """ 5 | Centralize Redis keys used throughout app. 6 | """ 7 | 8 | def __init__(self, prefix: str): 9 | # Prefix for all redis keys in app. 10 | self.prefix = prefix 11 | 12 | def pre(self, s: str) -> str: 13 | return f"{self.prefix}:{s}" 14 | 15 | def github_repo(self, owner: str, repo_name: str) -> str: 16 | return f"{self.prefix}:resource:github:{owner}:{repo_name}" 17 | 18 | def github_repo_lists(self, owner: str, repo_name: str) -> str: 19 | return f"{self.prefix}:resource:github:{owner}:{repo_name}:lists" 20 | 21 | def language_list(self) -> str: 22 | return f"{self.prefix}:resource:data:languages" 23 | 24 | def awesome_list_list(self) -> str: 25 | return f"{self.prefix}:resource:data:awesome_lists" 26 | -------------------------------------------------------------------------------- /searchapp/indexer/tests.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from indexer.index import Indexer 4 | from indexer.scrapers import AwesomeScrape 5 | 6 | 7 | class AwesomeScrapeTests(TestCase): 8 | def test_awesome_python(self): 9 | scraper = AwesomeScrape( 10 | "https://github.com/vinta/awesome-python/README/#") 11 | repos = scraper.scrape(1) 12 | for repo in repos: 13 | self.assertIsNotNone(repo['name']) 14 | self.assertIsNotNone(repo['full_name']) 15 | self.assertIsNotNone(repo['description']) 16 | 17 | 18 | class AwesomeIndexerTests(TestCase): 19 | def test_awesome_indexer(self): 20 | indexer = Indexer([ 21 | "https://github.com/vinta/awesome-python", 22 | "https://github.com/JamzyWang/awesome-redis" 23 | ], max_per_list=2) 24 | indexer.create_index_definition() 25 | indexer.index() 26 | 27 | -------------------------------------------------------------------------------- /searchapp/config/urls.py: -------------------------------------------------------------------------------- 1 | """searchapp URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/3.2/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import include, path 18 | 19 | urlpatterns = [ 20 | path('admin/', admin.site.urls), 21 | path('search/', include('search.urls'), name='search'), 22 | path('users/', include('users.urls'), name='users') 23 | ] 24 | -------------------------------------------------------------------------------- /searchapp/search/formatter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | UNSAFE_CHARS = re.compile('[\\[\\]\\<\\>+]') 4 | 5 | 6 | def parse(query: str) -> str: 7 | """ 8 | Remove unsafe characters 9 | https://github.com/redislabs-training/redis-sitesearch/blob/master/sitesearch/query_parser.py 10 | """ 11 | query = query.strip().replace("-*", "*") 12 | query = UNSAFE_CHARS.sub(' ', query) 13 | query = query.strip() 14 | return query 15 | 16 | 17 | def format_query(query, resources, languages, awesome_lists) -> str: 18 | text_query = parse(query) 19 | 20 | components = [] 21 | if len(resources) > 0: 22 | sources = "|".join(resources) 23 | components.append(f"@source:({sources})") 24 | if len(languages) > 0: 25 | languages = "|".join(languages) 26 | components.append(f"@language:({languages})") 27 | if len(awesome_lists) > 0: 28 | awesome_lists = " ".join(awesome_lists) 29 | components.append(f"@lists:({awesome_lists})") 30 | 31 | components = " ".join(components) 32 | query = f"{components} {text_query}" 33 | return query 34 | -------------------------------------------------------------------------------- /searchapp/users/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | from django.contrib.auth.models import User 3 | from django.contrib.auth import authenticate 4 | 5 | 6 | class UserSerializer(serializers.ModelSerializer): 7 | class Meta: 8 | model = User 9 | fields = ('id', 'username', 'email') 10 | 11 | 12 | class RegisterSerializer(serializers.ModelSerializer): 13 | class Meta: 14 | model = User 15 | fields = ('id', 'username', 'email', 'password') 16 | extra_kwargs = {'password': {'write_only': True}} 17 | 18 | def create(self, validated_data): 19 | user = User.objects.create_user( 20 | validated_data['username'], validated_data['email'], validated_data['password']) 21 | 22 | return user 23 | 24 | 25 | class LoginSerializer(serializers.Serializer): 26 | username = serializers.CharField() 27 | password = serializers.CharField() 28 | 29 | def validate(self, data): 30 | user = authenticate(**data) 31 | if user and user.is_active: 32 | return user 33 | raise serializers.ValidationError("Incorrect Credentials") 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Redis Developer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cli/test_cli.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from .awesome_search import format_error, format_results, parse_list, parser 4 | 5 | 6 | class CLITests(TestCase): 7 | def test_parse_list(self): 8 | self.assertEqual(parse_list("python, ruby django |* redis"), 9 | ["python", "ruby", "django", "redis"]) 10 | 11 | def test_format_error(self): 12 | args = parser.parse_args( 13 | ['blah', '-l', 'python,ruby', '--lists', 'ruby,django']) 14 | result = "No results found for: \"blah\" | Written in: \"python,ruby\" | Featured on: \"ruby,django\" lists" 15 | self.assertEqual(format_error(args), result) 16 | 17 | def test_format_results(self): 18 | args = parser.parse_args(['redis', '-r', '1']) 19 | docs = [{'id': 'awesome:resource:github:andymccurdy:redis-py', 'payload': None, 'repo_name': 'redis-py', 'lists': 'awesome-redis, awesome-python', 20 | 'body': 'Redis Python Client', 'stargazers_count': '9277', 'language': 'Python', 'svn_url': 'https://github.com/andymccurdy/redis-py'}] 21 | result = "\x1b[92mredis-py\x1b[0m - Redis Python Client\nStars \x1b[93m9277\x1b[0m https://github.com/andymccurdy/redis-py\n\n" 22 | self.assertEqual(format_results(docs, args.results), result) 23 | -------------------------------------------------------------------------------- /searchapp/users/views.py: -------------------------------------------------------------------------------- 1 | from rest_framework import generics, permissions 2 | from rest_framework.response import Response 3 | from knox.models import AuthToken 4 | from .serializers import UserSerializer, RegisterSerializer, LoginSerializer 5 | 6 | # Register API 7 | class RegisterAPI(generics.GenericAPIView): 8 | serializer_class = RegisterSerializer 9 | 10 | def post(self, request, *args, **kwargs): 11 | serializer = self.get_serializer(data=request.data) 12 | serializer.is_valid(raise_exception=True) 13 | user = serializer.save() 14 | return Response({ 15 | "user": UserSerializer(user, context=self.get_serializer_context()).data, 16 | "token": AuthToken.objects.create(user)[1] 17 | }) 18 | 19 | # Login API 20 | class LoginAPI(generics.GenericAPIView): 21 | serializer_class = LoginSerializer 22 | 23 | def post(self, request, *args, **kwargs): 24 | serializer = self.get_serializer(data=request.data) 25 | serializer.is_valid(raise_exception=True) 26 | user = serializer.validated_data 27 | _, token = AuthToken.objects.create(user) 28 | return Response({ 29 | "user": UserSerializer(user, context=self.get_serializer_context()).data, 30 | "token": token 31 | }) 32 | 33 | # Get User API 34 | class UserAPI(generics.RetrieveAPIView): 35 | permission_classes = [ 36 | permissions.IsAuthenticated, 37 | ] 38 | serializer_class = UserSerializer 39 | 40 | def get_object(self): 41 | return self.request.user 42 | -------------------------------------------------------------------------------- /assets/list_of_lists.txt: -------------------------------------------------------------------------------- 1 | https://github.com/EthicalML/awesome-production-machine-learning 2 | https://github.com/vinta/awesome-python 3 | https://github.com/JamzyWang/awesome-redis 4 | https://github.com/sorrycc/awesome-javascript 5 | https://github.com/uhub/awesome-javascript 6 | https://github.com/sindresorhus/awesome-nodejs 7 | https://github.com/markets/awesome-ruby 8 | https://github.com/sdogruyol/awesome-ruby 9 | https://github.com/gramantin/awesome-rails 10 | https://github.com/veggiemonk/awesome-docker 11 | https://github.com/mjhea0/awesome-flask 12 | https://github.com/humiaozuzu/awesome-flask 13 | https://github.com/wsvincent/awesome-django 14 | https://github.com/ramitsurana/awesome-kubernetes 15 | https://github.com/enaqx/awesome-react 16 | https://github.com/dzharii/awesome-typescript 17 | https://github.com/uralbash/awesome-pyramid 18 | https://github.com/krzjoa/awesome-python-data-science 19 | https://github.com/mjhea0/awesome-fastapi 20 | https://github.com/shahraizali/awesome-django 21 | https://github.com/ucg8j/awesome-dash 22 | https://github.com/typeddjango/awesome-python-typing 23 | https://github.com/faroit/awesome-python-scientific-audio 24 | https://github.com/timofurrer/awesome-asyncio 25 | https://github.com/hbokh/awesome-saltstack 26 | https://github.com/kolomied/awesome-cdk 27 | https://github.com/Cellane/awesome-vapor 28 | https://github.com/shuaibiyy/awesome-terraform 29 | https://github.com/phalcon/awesome-phalcon 30 | https://github.com/chiraggude/awesome-laravel 31 | https://github.com/iJackUA/awesome-vagrant 32 | https://github.com/0xnr/awesome-bigdata 33 | https://github.com/pmuens/awesome-serverless 34 | https://github.com/chentsulin/awesome-graphql 35 | -------------------------------------------------------------------------------- /marketplace.json: -------------------------------------------------------------------------------- 1 | { 2 | "app_name": "Awesome Search", 3 | "description": "Search awesome list resources directly from your CLI! using Search and Query feature of Redis", 4 | "rank": 85, 5 | "type": "Full App", 6 | "contributed_by": "Community", 7 | "repo_url": "https://github.com/redis-developer/awesome-search", 8 | "preview_image_url": "https://raw.githubusercontent.com/redis-developer/awesome-search/master/images/app_preview_image.png", 9 | "download_url": "https://github.com/redis-developer/awesome-search/archive/refs/heads/master.zip", 10 | "hosted_url": "", 11 | "quick_deploy": "false", 12 | "deploy_buttons": [ 13 | { 14 | "heroku": "https://heroku.com/deploy?template=https://github.com/redis-developer/awesome-search.git" 15 | }, 16 | { 17 | "Google": "https://deploy.cloud.run/?git_repo=https://github.com/redis-developer/awesome-search.git" 18 | } 19 | ], 20 | "language": [ 21 | "Python" 22 | ], 23 | "redis_commands": [ 24 | "SET", 25 | "SADD", 26 | "SMEMBERS", 27 | "FT.CREATE", 28 | "FT.SEARCH", 29 | "FT.DROPINDEX" 30 | ], 31 | "redis_use_cases": [ 32 | "Caching" 33 | ], 34 | "redis_features": [ 35 | "Search and Query" 36 | ], 37 | "app_image_urls": [ 38 | "https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/demo.png", 39 | "https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/diagram.png" 40 | ], 41 | "youtube_url": "https://www.youtube.com/watch?v=Hbpb-Y0dXPs", 42 | "special_tags": [ 43 | "Hackathon" 44 | ], 45 | "verticals": [ 46 | "Others" 47 | ], 48 | "markdown": "https://raw.githubusercontent.com/redis-developer/awesome-search/master/README.md" 49 | } -------------------------------------------------------------------------------- /searchapp/search/views.py: -------------------------------------------------------------------------------- 1 | from config.keys import Keys 2 | from config.settings import INDEX_NAME, KEY_PREFIX 3 | from django_redis import get_redis_connection 4 | from redis import Redis 5 | from redisearch import Client, Query 6 | from rest_framework.decorators import api_view 7 | from rest_framework.response import Response 8 | 9 | from .formatter import format_query 10 | 11 | keys = Keys(KEY_PREFIX) 12 | 13 | 14 | @api_view(['GET']) 15 | def general_search(request) -> Response: 16 | """ 17 | Default full text search on all resources if no sources are specified. 18 | 19 | Faceted search if sources are specified. 20 | 21 | **query**: Query to search. 22 | **source**: Multiple sources can be specifed. 23 | """ 24 | 25 | client = Client(INDEX_NAME, conn=get_redis_connection()) 26 | 27 | query = request.GET.get('query') 28 | sort_stars = request.GET.get('sort-stars') 29 | resources = request.GET.getlist('source') 30 | languages = request.GET.getlist('language') 31 | awesome_lists = request.GET.getlist('awesome-list') 32 | 33 | query = format_query(query, resources, languages, awesome_lists) 34 | results = client.search(Query(query)) 35 | results = [doc.__dict__ for doc in results.docs] 36 | if sort_stars == "true": 37 | results.sort(key=lambda x: int(x['stargazers_count']), reverse=True) 38 | 39 | return Response({ 40 | "docs": results 41 | }) 42 | 43 | 44 | @api_view(['GET']) 45 | def languages(request) -> Response: 46 | """ 47 | Returns list of languges. 48 | """ 49 | client = get_redis_connection() 50 | result = client.smembers(keys.language_list()) 51 | 52 | return Response({ 53 | "languages": result 54 | }) 55 | 56 | 57 | @api_view(['GET']) 58 | def awesome_lists(request) -> Response: 59 | """ 60 | Returns list of awesome lists. 61 | """ 62 | client = get_redis_connection() 63 | result = client.smembers(keys.awesome_list_list()) 64 | 65 | return Response({ 66 | "lists": result 67 | }) 68 | -------------------------------------------------------------------------------- /searchapp/search/tests.py: -------------------------------------------------------------------------------- 1 | from django.db.models import query 2 | from django.http import response 3 | from django.urls import reverse 4 | from rest_framework import status 5 | from rest_framework.test import APITestCase 6 | from .formatter import format_query, parse 7 | 8 | 9 | class GeneralSearchTests(APITestCase): 10 | 11 | def test_query_formatter(self): 12 | query = format_query('redis search', resources=['tweets', 'github'], 13 | languages=['Python', 'Ruby'], awesome_lists=['awesome-python', 'awesome-ruby']) 14 | target = "@source:(tweets|github) @language:(Python|Ruby) @lists:(awesome-python awesome-ruby) redis search" 15 | self.assertEquals(query, target) 16 | 17 | def test_query_parser(self): 18 | query = "redis-*search" 19 | query = parse(query) 20 | self.assertEqual(query, "redis*search") 21 | 22 | def test_general_search(self): 23 | url = reverse("general-search") 24 | response = self.client.get(url, {'query': 'awesome'}) 25 | self.assertEqual(response.status_code, status.HTTP_200_OK) 26 | self.assertIsNotNone(response.json()['docs']) 27 | 28 | def test_faceted_search(self): 29 | url = reverse("general-search") 30 | response = self.client.get(url, {'query': 'python', 'source': [ 31 | 'tweets', 'github'], 'language': ['Python', 'Ruby']}) 32 | self.assertEqual(response.status_code, status.HTTP_200_OK) 33 | self.assertIsNotNone(response.json()['docs']) 34 | 35 | def test_language_list(self): 36 | url = reverse("search-languages") 37 | response = self.client.get(url) 38 | self.assertIsNotNone(response.json()['languages']) 39 | self.assertEqual(response.status_code, status.HTTP_200_OK) 40 | 41 | def test_awesome_list_list(self): 42 | url = reverse("search-lists") 43 | response = self.client.get(url) 44 | self.assertIsNotNone(response.json()['lists']) 45 | self.assertEqual(response.status_code, status.HTTP_200_OK) 46 | -------------------------------------------------------------------------------- /searchapp/users/tests.py: -------------------------------------------------------------------------------- 1 | from django.contrib.auth.models import User 2 | from django.urls import reverse 3 | from knox.models import AuthToken 4 | from rest_framework import status 5 | from rest_framework.test import APIClient, APITestCase 6 | 7 | 8 | class UserLoginTests(APITestCase): 9 | 10 | def setUp(self): 11 | self.test_user = { 12 | 'username': 'admin', 13 | 'password': 'password123', 14 | 'email': 'admin@admin.com' 15 | } 16 | self.client = APIClient() 17 | user = User.objects.create_user(self.test_user['username'], 18 | self.test_user['email'], 19 | self.test_user['password']) 20 | self.token = AuthToken.objects.create(user) 21 | 22 | def test_login_user(self): 23 | url = reverse('login') 24 | response = self.client.post(url, data={ 25 | 'username': self.test_user['username'], 26 | 'password': self.test_user['password'], 27 | }) 28 | data = response.json() 29 | self.assertEqual(response.status_code, status.HTTP_200_OK) 30 | self.assertEqual(data['user']['id'], 1) 31 | self.assertEqual(data['user']['username'], self.test_user['username']) 32 | self.assertIsNotNone(data['token']) 33 | 34 | def test_deny_register_existing_user(self): 35 | url = reverse('register') 36 | response = self.client.post(url, data={ 37 | 'username': self.test_user['username'], 38 | 'password': self.test_user['password'], 39 | 'email': self.test_user['password'], 40 | }) 41 | self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) 42 | 43 | def test_register_user(self): 44 | url = reverse('register') 45 | response = self.client.post(url, data={ 46 | 'username': 'admin2', 47 | 'password': 'pass2', 48 | 'email': 'admin2@admin.com', 49 | }) 50 | data = response.json() 51 | self.assertEqual(response.status_code, status.HTTP_200_OK) 52 | self.assertIsNotNone(data['token']) 53 | self.assertEqual(data['user']['username'], 'admin2') 54 | -------------------------------------------------------------------------------- /raycast/awesome_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Required parameters: 4 | # @raycast.schemaVersion 1 5 | # @raycast.title Awesome 6 | # @raycast.mode fullOutput 7 | # @raycast.refreshTime 5m 8 | # @raycast.packageName Search 9 | 10 | # Optional parameters: 11 | # @raycast.icon 📖 12 | # @raycast.argument1 { "type": "text", "placeholder": "query", "optional": false } 13 | # @raycast.argument2 { "type": "text", "placeholder": "languages (e.g. python)", "optional": true } 14 | # @raycast.argument3 { "type": "text", "placeholder": "sort stars?", "optional": true } 15 | 16 | # Documentation: 17 | # @raycast.description Search awesome lists and more! 18 | # @raycast.author Marko Arezina 19 | # @raycast.authorURL https://github.com/mrkarezina 20 | 21 | import json 22 | import re 23 | import sys 24 | from typing import List 25 | from urllib.parse import quote 26 | from urllib.request import urlopen 27 | 28 | # API_URL = "http://127.0.0.1:8000" 29 | API_URL = "https://awesome-search-dot-graph-intelligence.uc.r.appspot.com" 30 | 31 | colors = { 32 | 'green': '\033[92m', 33 | 'red': '\033[91m', 34 | 'end': '\033[0m', 35 | 'yellow': '\033[93m', 36 | } 37 | 38 | 39 | def parse_list(arg: str) -> str: 40 | """ 41 | Parse script args containing a list of values. 42 | Splits tokens by any non-alphanumeric character. 43 | """ 44 | return re.split(r'\W+', arg) 45 | 46 | 47 | def format_url(query: str, languages: List[str], lists: List[str] = [], sort_stars: bool = False) -> str: 48 | query = quote(query) 49 | 50 | languages = [f'language={l}' for l in languages if l != ''] 51 | languages = "&".join(languages) 52 | 53 | lists = [f'awesome-list={l}' for l in lists if l != ''] 54 | lists = "&".join(lists) 55 | 56 | sort_stars = 'true' if sort_stars else 'false' 57 | 58 | return f"{API_URL}/search?query={query}&{languages}&{lists}&sort-stars={sort_stars}" 59 | 60 | 61 | if __name__ == "__main__": 62 | query = sys.argv[1] 63 | languages = parse_list(sys.argv[2]) 64 | sort_stars = sys.argv[3] 65 | 66 | # Default to sort by relevance 67 | if sort_stars.lower() not in ['', '0', 'false']: 68 | sort_stars = True 69 | else: 70 | sort_stars = False 71 | 72 | query_url = format_url(query, languages, sort_stars=sort_stars) 73 | 74 | try: 75 | with urlopen(query_url) as f: 76 | result = json.load(f) 77 | except: 78 | print('Failed loading resources ...') 79 | sys.exit(0) 80 | 81 | if len(result['docs']) == 0: 82 | print("No results found.") 83 | 84 | for doc in result['docs']: 85 | name, desc, stars, url = doc['repo_name'], doc['body'], doc['stargazers_count'], doc['svn_url'] 86 | print(f"{colors['green']}{name}{colors['end']} - {desc}") 87 | print(f"Stars {colors['yellow']}{stars}{colors['end']} {url}\n") 88 | -------------------------------------------------------------------------------- /searchapp/indexer/scrapers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from time import sleep 3 | from typing import Dict, List 4 | from json.decoder import JSONDecodeError 5 | 6 | import requests 7 | from config.settings import GH_ACCESS_TOKEN 8 | 9 | MAX_THREADS = 4 10 | DELAY = 1 11 | URL_PATTERN = "(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})" 12 | 13 | 14 | class RepoScraperError(Exception): 15 | pass 16 | 17 | 18 | class RepoScraper: 19 | """ 20 | Uses Github API to return data for a repo. 21 | """ 22 | 23 | def __init__(self, url: str): 24 | try: 25 | url = url.replace("https://github.com/", '') 26 | url = url.split('/') 27 | self.owner = url[0] 28 | self.repo = url[1].split(')', 1)[0] 29 | except: 30 | raise RepoScraperError() 31 | 32 | def get_repo_data(self) -> Dict: 33 | return requests.get(f"https://api.github.com/repos/{self.owner}/{self.repo}", headers={ 34 | 'Authorization': f'token {GH_ACCESS_TOKEN}', 35 | }).json() 36 | 37 | def get_readme_text(self) -> str: 38 | data = requests.get( 39 | f"https://api.github.com/repos/{self.owner}/{self.repo}/readme", headers={ 40 | 'Authorization': f'token {GH_ACCESS_TOKEN}', 41 | }).json() 42 | download = data['download_url'] 43 | return requests.get(download).text 44 | 45 | 46 | class AwesomeScrape: 47 | """ 48 | Scrapes repositories found on an awsome list. 49 | 50 | Saves the repository data into a Redis instance. 51 | """ 52 | 53 | def __init__(self, url: str): 54 | self.url = url 55 | self.repo_urls = self.__parse_urls() 56 | 57 | def __parse_urls(self) -> List[str]: 58 | """ 59 | Parses all Github repo URLs found on awsome list. 60 | """ 61 | raw_readme = RepoScraper(self.url).get_readme_text() 62 | urls = [] 63 | for match in re.finditer(re.compile(URL_PATTERN), raw_readme): 64 | url = match.group() 65 | if url.startswith("https://github.com/"): 66 | urls.append(url) 67 | return urls 68 | 69 | def scrape(self, max_num: int = None) -> List[Dict]: 70 | """ 71 | Scrapes data on all repositories found on awsome list. 72 | """ 73 | 74 | repo_data = [] 75 | 76 | def get_repo_data(url) -> Dict: 77 | data = RepoScraper(url).get_repo_data() 78 | # Test response contains repo 79 | data['name'] 80 | print(data['name'], data['description']) 81 | repo_data.append(data) 82 | 83 | urls = self.repo_urls[:max_num] if max_num is not None else self.repo_urls 84 | for url in urls: 85 | try: 86 | get_repo_data(url) 87 | sleep(DELAY) 88 | except (RepoScraperError, KeyError, JSONDecodeError): 89 | print(f"Invalid URL: {url}") 90 | 91 | return repo_data 92 | -------------------------------------------------------------------------------- /cli/awesome_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import re 4 | import sys 5 | from typing import List 6 | from urllib.parse import quote 7 | from urllib.request import urlopen 8 | 9 | API_URL = "https://awesome-search-dot-graph-intelligence.uc.r.appspot.com" 10 | 11 | colors = { 12 | 'green': '\033[92m', 13 | 'red': '\033[91m', 14 | 'end': '\033[0m', 15 | 'yellow': '\033[93m', 16 | } 17 | 18 | parser = argparse.ArgumentParser(description='Search preferences.') 19 | parser.add_argument( 20 | 'query', help="Query to search. Use quotes for queries with spaces.") 21 | parser.add_argument('-l', '--languages', default='', 22 | required=False, help='Comma delimited list of programming languages to filter by.') 23 | parser.add_argument('--lists', default='', 24 | required=False, help='Comma delimited list of terms filtering which awesome lists to search. E.g: django matches awesome-django.') 25 | parser.add_argument('-s', '--stars', action='store_const', 26 | const=True, default=False, help='Toggle to sort result by stars.') 27 | parser.add_argument('-r', '--results', default=5, type=int, 28 | required=False, help='Results to display.') 29 | 30 | 31 | def parse_list(arg: str) -> List[str]: 32 | """ 33 | Parse script args containing a list of values. 34 | Splits tokens by any non-alphanumeric character. 35 | """ 36 | return re.split(r'\W+', arg) 37 | 38 | 39 | def format_url(query: str, languages: List[str], lists: List[str] = [], sort_stars: bool = False) -> str: 40 | query = quote(query) 41 | 42 | languages = [f'language={l}' for l in languages if l != ''] 43 | languages = "&".join(languages) 44 | 45 | lists = [f'awesome-list={l}' for l in lists if l != ''] 46 | lists = "&".join(lists) 47 | 48 | sort_stars = 'true' if sort_stars else 'false' 49 | 50 | return f"{API_URL}/search?query={query}&{languages}&{lists}&sort-stars={sort_stars}" 51 | 52 | 53 | def fetch_results(query_url: str) -> List[dict]: 54 | try: 55 | with urlopen(query_url) as f: 56 | result = json.load(f) 57 | except: 58 | print('Failed loading results ...') 59 | sys.exit(0) 60 | return result['docs'] 61 | 62 | 63 | def format_results(docs: List[dict], results: int) -> str: 64 | s = "" 65 | for doc in docs[:results]: 66 | name, desc, stars, url = doc['repo_name'], doc['body'], doc['stargazers_count'], doc['svn_url'] 67 | s += f"{colors['green']}{name}{colors['end']} - {desc}\n" 68 | s += f"Stars {colors['yellow']}{stars}{colors['end']} {url}\n\n" 69 | return s 70 | 71 | 72 | def format_error(args: argparse.Namespace) -> str: 73 | s = f"No results found for: \"{args.query}\"" 74 | if args.languages != "": 75 | s += f" | Written in: \"{args.languages}\"" 76 | if args.lists != "": 77 | s += f" | Featured on: \"{args.lists}\" lists" 78 | return s 79 | 80 | 81 | def main(): 82 | args = parser.parse_args() 83 | query_url = format_url(args.query, 84 | parse_list(args.languages), 85 | parse_list(args.lists), sort_stars=args.stars) 86 | docs = fetch_results(query_url) 87 | if len(docs) == 0: 88 | print(format_error(args)) 89 | else: 90 | print(format_results(docs, args.results)) 91 | -------------------------------------------------------------------------------- /searchapp/indexer/index.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import List 3 | 4 | from config.keys import Keys 5 | from config.settings import (INDEX_NAME, KEY_PREFIX, REDIS_HOST, 6 | REDIS_PASSWORD, REDIS_PORT, MAX_RES_PER_LIST) 7 | from redis.exceptions import DataError, ResponseError 8 | from redisearch import Client, IndexDefinition, TextField 9 | from redisearch.client import TagField 10 | 11 | from .scrapers import AwesomeScrape, RepoScraper 12 | 13 | 14 | class Indexer: 15 | """ 16 | Scrapes repos found on awesome lists. Inserts repo data into Redis. 17 | 18 | **urls**: List of Github awsome lists URLs. 19 | """ 20 | 21 | def __init__(self, urls: List[str], max_per_list: int = MAX_RES_PER_LIST): 22 | self.urls = urls 23 | self.client = Client(INDEX_NAME, host=REDIS_HOST, 24 | port=REDIS_PORT, password=REDIS_PASSWORD) 25 | self.keys = Keys(KEY_PREFIX) 26 | self.max = max_per_list 27 | 28 | def create_index_definition(self, drop_existing=False): 29 | """ 30 | Create an index definition. Do nothing if it already exists. 31 | """ 32 | 33 | if drop_existing: 34 | self.client.drop_index() 35 | 36 | definition = IndexDefinition(prefix=[self.keys.pre("resource:")]) 37 | try: 38 | self.client.create_index([TextField('body', weight=1), 39 | TextField('repo_name', weight=1.5), 40 | TextField('language', weight=1), 41 | TextField('lists')], definition=definition) 42 | except ResponseError: 43 | print("Index already exists.") 44 | 45 | def index(self): 46 | """ 47 | Insert scraped resources into Redis. 48 | """ 49 | for url in self.urls: 50 | parent = RepoScraper(url) 51 | print(f"Creating index for {parent.repo}") 52 | 53 | self.client.redis.sadd(self.keys.awesome_list_list(), parent.repo) 54 | resources = AwesomeScrape(url).scrape(max_num=self.max) 55 | 56 | # Create set of all awesome lists a repo appears on 57 | # Required to set tag feild 58 | for resource in resources: 59 | try: 60 | self.client.redis.sadd(self.keys.github_repo_lists(resource['owner']['login'], resource['name']), 61 | parent.repo) 62 | except (KeyError, DataError): 63 | pass 64 | 65 | for resource in resources: 66 | try: 67 | if resource['language'] is not None: 68 | language = resource['language'] 69 | self.client.redis.sadd(self.keys.language_list(), 70 | language) 71 | else: 72 | language = '' 73 | 74 | lists = self.client.redis.smembers(self.keys.github_repo_lists(resource['owner']['login'], 75 | resource['name'])) 76 | 77 | self.client.redis.hset(self.keys.github_repo(resource['owner']['login'], resource['name']), 78 | mapping={ 79 | 'repo_name': resource['name'], 80 | 'lists': ", ".join(lists), 81 | 'body': resource['description'], 82 | 'stargazers_count': resource['stargazers_count'], 83 | 'language': language, 84 | 'svn_url': resource['svn_url'] 85 | }) 86 | 87 | except (KeyError, DataError): 88 | print(f"Resource missing data: f{resource}") 89 | 90 | 91 | if __name__ == "__main__": 92 | with open("../assets/list_of_lists.txt") as lists: 93 | indexer = Indexer(lists.read().splitlines(), max_per_list=MAX_RES_PER_LIST) 94 | indexer.create_index_definition() 95 | indexer.index() 96 | -------------------------------------------------------------------------------- /searchapp/config/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for searchapp project. 3 | 4 | Generated by 'django-admin startproject' using Django 3.2. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.2/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/3.2/ref/settings/ 11 | """ 12 | 13 | from pathlib import Path 14 | from configparser import ConfigParser 15 | import os 16 | 17 | # Build paths inside the project like this: BASE_DIR / 'subdir'. 18 | BASE_DIR = Path(__file__).resolve().parent.parent 19 | CONFIG_DIR = BASE_DIR 20 | 21 | config = ConfigParser() 22 | config.read(os.path.join(CONFIG_DIR, "config.ini")) 23 | 24 | REDIS_HOST = config.get('redis', 'HOST') 25 | REDIS_PORT = config.get('redis', 'PORT') 26 | REDIS_PASSWORD = config.get('redis', 'PASSWORD') 27 | # REDIS_PASSWORD = None 28 | REDIS_URL = f'redis://{REDIS_HOST}:{REDIS_PORT}/0' 29 | INDEX_NAME = "golden_search" 30 | KEY_PREFIX = "awesome" 31 | 32 | API_KEY = config.get('tweepy', 'API_KEY') 33 | API_SECRET_KEY = config.get('tweepy', 'API_SECRET_KEY') 34 | 35 | ACCESS_TOKEN = config.get('tweepy', 'ACCESS_TOKEN') 36 | ACCESS_TOKEN_SECRET = config.get('tweepy', 'ACCESS_TOKEN_SECRET') 37 | 38 | GH_ACCESS_TOKEN = config.get('github', 'ACCESS_TOKEN') 39 | 40 | AWESOME_LISTS = [] 41 | # Number of resources to include per list 42 | MAX_RES_PER_LIST = 300 43 | 44 | 45 | # Quick-start development settings - unsuitable for production 46 | # See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/ 47 | 48 | # 49 | if os.getenv('GAE_APPLICATION', None): 50 | # SECURITY WARNING: don't run with debug turned on in production! 51 | DEBUG = False 52 | # SECURITY WARNING: keep the secret key used in production secret! 53 | SECRET_KEY = config.get('prod', 'SECRET_KEY').strip() 54 | else: 55 | DEBUG = True 56 | SECRET_KEY = 'django-insecure-93_n5luo&94acd^2aaal(w^00l22%w)6dys(hkr!8f9ga#i97k' 57 | 58 | # SECURITY WARNING: App Engine's security features ensure that it is safe to 59 | # have ALLOWED_HOSTS = ['*'] when the app is deployed. If you deploy a Django 60 | # app not on App Engine, make sure to set an appropriate host here. 61 | # See https://docs.djangoproject.com/en/2.1/ref/settings/ 62 | ALLOWED_HOSTS = ['*'] 63 | 64 | 65 | # Application definition 66 | 67 | INSTALLED_APPS = [ 68 | 'django.contrib.admin', 69 | 'django.contrib.auth', 70 | 'django.contrib.contenttypes', 71 | 'django.contrib.sessions', 72 | 'django.contrib.messages', 73 | 'django.contrib.staticfiles', 74 | 'corsheaders', 75 | 'rest_framework', 76 | 'rest_framework_nested', 77 | 'users', 78 | 'search', 79 | 'knox' 80 | ] 81 | 82 | REST_FRAMEWORK = { 83 | 'DEFAULT_AUTHENTICATION_CLASSES': ('knox.auth.TokenAuthentication',) 84 | } 85 | 86 | MIDDLEWARE = [ 87 | 'corsheaders.middleware.CorsMiddleware', 88 | 'django.middleware.security.SecurityMiddleware', 89 | 'django.contrib.sessions.middleware.SessionMiddleware', 90 | 'django.middleware.common.CommonMiddleware', 91 | 'django.middleware.csrf.CsrfViewMiddleware', 92 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 93 | 'django.contrib.messages.middleware.MessageMiddleware', 94 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 95 | ] 96 | 97 | ROOT_URLCONF = 'config.urls' 98 | 99 | TEMPLATES = [ 100 | { 101 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 102 | 'DIRS': [], 103 | 'APP_DIRS': True, 104 | 'OPTIONS': { 105 | 'context_processors': [ 106 | 'django.template.context_processors.debug', 107 | 'django.template.context_processors.request', 108 | 'django.contrib.auth.context_processors.auth', 109 | 'django.contrib.messages.context_processors.messages', 110 | ], 111 | }, 112 | }, 113 | ] 114 | 115 | WSGI_APPLICATION = 'config.wsgi.application' 116 | 117 | 118 | 119 | # Database 120 | # https://docs.djangoproject.com/en/3.2/ref/settings/#databases 121 | if os.getenv('GAE_APPLICATION', None): 122 | # Install PyMySQL as mysqlclient/MySQLdb to use Django's mysqlclient adapter 123 | # See https://docs.djangoproject.com/en/2.1/ref/databases/#mysql-db-api-drivers 124 | # for more information 125 | import pymysql # noqa: 402 126 | pymysql.version_info = (1, 4, 6, 'final', 0) # change mysqlclient version 127 | pymysql.install_as_MySQLdb() 128 | 129 | # Running on production App Engine, so connect to Google Cloud SQL using 130 | # the unix socket at /cloudsql/ 131 | DATABASES = { 132 | 'default': { 133 | 'ENGINE': 'django.db.backends.mysql', 134 | 'HOST': f"/cloudsql/{config.get('prod', 'MYSQL_CONNECTION_NAME')}", 135 | 'USER': config.get('prod', 'MYSQL_USER'), 136 | 'PASSWORD': config.get('prod', 'MYSQL_PASS'), 137 | 'NAME': config.get('prod', 'MYSQL_NAME'), 138 | } 139 | } 140 | else: 141 | # Running locally so connect to either a local MySQL instance or connect to 142 | # Cloud SQL via the proxy. To start the proxy via command line: 143 | # 144 | # $ cloud_sql_proxy -instances=[INSTANCE_CONNECTION_NAME]=tcp:3306 145 | # 146 | # See https://cloud.google.com/sql/docs/mysql-connect-proxy 147 | DATABASES = { 148 | 'default': { 149 | 'ENGINE': 'django.db.backends.sqlite3', 150 | 'NAME': BASE_DIR / 'db.sqlite3', 151 | } 152 | } 153 | 154 | 155 | CACHES = { 156 | "default": { 157 | "BACKEND": "django_redis.cache.RedisCache", 158 | "LOCATION": REDIS_URL, 159 | "OPTIONS": { 160 | "CLIENT_CLASS": "django_redis.client.DefaultClient", 161 | "PASSWORD": REDIS_PASSWORD 162 | } 163 | } 164 | } 165 | 166 | RQ_QUEUES = { 167 | 'default': { 168 | 'USE_REDIS_CACHE': 'redis-cache' 169 | } 170 | } 171 | 172 | # Password validation 173 | # https://docs.djangoproject.com/en/3.2/ref/settings/#auth-password-validators 174 | 175 | AUTH_PASSWORD_VALIDATORS = [ 176 | { 177 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 178 | }, 179 | { 180 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 181 | }, 182 | { 183 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 184 | }, 185 | { 186 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 187 | }, 188 | ] 189 | 190 | 191 | # Internationalization 192 | # https://docs.djangoproject.com/en/3.2/topics/i18n/ 193 | 194 | LANGUAGE_CODE = 'en-us' 195 | 196 | TIME_ZONE = 'UTC' 197 | 198 | USE_I18N = True 199 | 200 | USE_L10N = True 201 | 202 | USE_TZ = True 203 | 204 | 205 | # Static files (CSS, JavaScript, Images) 206 | # https://docs.djangoproject.com/en/3.2/howto/static-files/ 207 | 208 | STATIC_URL = '/static/' 209 | 210 | # Default primary key field type 211 | # https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field 212 | 213 | DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' 214 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Awesome Search 2 | 3 | Find and index quality [awesome list](https://github.com/redis-developer/awesome) resources directly from [Raycast](https://raycast.com/) or your CLI! 4 | 5 | Powered by blazing fast [Redis Search](https://oss.redislabs.com/redisearch/). 6 | 7 | 8 | ![Demo](https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/demo.png) 9 | 10 | 11 | ## Why 12 | Search results are frequently SEO'd to death. Results are full of low quality tutorials and blogs, making it hard to find the golden resources and niche blogs in all the noise. 13 | 14 | The goal of Awesome Search is to build a tool to find high quality resources amidst all the noise. Awesome Search is not meant to act like Google which is great for just about anything, rather focus on curated resources and niche blogs that might not rank as high on Google. 15 | 16 | Currently the prototype features searching across projects featured on awesome lists. 17 | 18 | 19 | ## Features 20 | - Search projects across awesome lists. 21 | - Customize search preferences. 22 | - Submit an awesome list for indexing. 23 | 24 | 25 | ## Next steps 26 | 27 | Indexing engineering blogs which might not rank as high in search results. For an example check out [this](https://cse.google.com/cse?cx=7170ef95a8051e78a) programmable search engine which only indexes engineering blogs on [this awesome list](https://github.com/kilimchoi/engineering-blogs). 28 | 29 | There is also a Users module currently in the Django app. This module is for creating an API key that users can save to the cli app. This allows for restricting accounts that can index new lists thus reducing spam. 30 | 31 | 32 | ## Stack 33 | - CLI - *Python*, *Raycast* 34 | - Backend - *Django*, *Redis * 35 | 36 | 37 | 38 | ## Installation 39 | 40 | 41 | ### CLI 42 | 43 | Create and activate a python virtual environment. 44 | ``` 45 | python -m venv venv 46 | ``` 47 | ``` 48 | source venv/bin/activate 49 | ``` 50 | 51 | Then: 52 | 53 | ``` 54 | pip install awesome-search 55 | ``` 56 | 57 | Usage 58 | ``` 59 | awesome "[query]" 60 | ``` 61 | 62 | Example search django redis projects, sort top results by stars. 63 | ``` 64 | awesome "django redis" -l python -s 65 | ``` 66 | 67 | #### Options 68 | 69 | Comma delimited list of languages. 70 | ``` 71 | --languages python,javascript 72 | ``` 73 | 74 | Comma delimited list of terms to filter awesome lists results appear on. E.g "redis,django" for awesome-redis, awesome-django. 75 | ``` 76 | --lists [terms] 77 | ``` 78 | 79 | Sort results by stars. 80 | ``` 81 | --stars 82 | ``` 83 | 84 | Hits to return. 85 | ``` 86 | --results 5 87 | ``` 88 | 89 | 90 | ### Raycast 91 | 92 | To add the script follow the instructions on the [Raycast script commands page](https://github.com/raycast/script-commands). 93 | 94 | If you already have a script directory for your Raycast scripts simply copy the `raycast/awesome_search.py` script to it. 95 | 96 | 97 | ## How it works 98 | Resources across different sources are stored in a variety of keys and data types using Redis. 99 | 100 | Resource data is stored as a JSON sterilized string. 101 | 102 | [django-redis](https://github.com/jazzband/django-redis) is used to configure Redis as the backend for Django's cache. This allows for neatly managing the connection for the [redis-py](https://github.com/andymccurdy/redis-py) and [redisearch-py](https://github.com/RediSearch/redisearch-py) client instances using `get_redis_connection()`. 103 | 104 | Redis Queue is used to submit new indexing jobs. 105 | 106 | 107 | ### Architecture 108 | 109 | ![Diagram](https://raw.githubusercontent.com/redis-developer/awesome-search/master/assets/diagram.png) 110 | 111 | ### Schema 112 | 113 | All types of resources are prefixed with `resource:`. This gives flexibility in extending to new resource types such as blogs. 114 | 115 | ### Github Repos 116 | 117 | We use a set to track which awesome lists a repository appears on. After indexing the contents of the set are added as a document property for [filtering search results]((https://oss.redislabs.com/redisearch/Query_Syntax/#field_modifiers)) by awesome list. 118 | ``` 119 | SADD resource:github:{owner}:{repo_name}:lists {list} 120 | ``` 121 | 122 | ``` 123 | SET resource:github:{owner}:{repo_name} 124 | { 125 | 'repo_name': resource['name'], 126 | 'lists': # SMEMBERS resource:github:{owner}:{repo_name}:lists 127 | 'body': resource['description'], 128 | 'stargazers_count': resource['stargazers_count'], 129 | 'language': resource['language'], 130 | 'svn_url': resource['svn_url'] 131 | } 132 | ``` 133 | 134 | Additionally when inserting a new resource, maintain a list of unique awesome lists and languages to implement faceted search. 135 | 136 | ``` 137 | SADD resource:data:languages {language} 138 | ``` 139 | 140 | ``` 141 | SADD resource:data:awesome_lists {list} 142 | ``` 143 | 144 | 145 | ### Search 146 | 147 | #### Index 148 | All keys storing resource data are prefixed with `resource:`. This allows for easily defining aRedis Searchindex with all the different resource types we want to search. 149 | 150 | ```python 151 | definition = IndexDefinition(prefix=['resource:']) 152 | ``` 153 | 154 | Optionally if only specific resources such as Github Repos were to be indexed more specific prefixes could be specified: `prefix=['resource:github']`. 155 | 156 | Before making any queries the index needs to be built. 157 | ```python 158 | self.client.create_index([TextField('body', weight=1), 159 | TextField('repo_name', weight=1.5), 160 | TextField('language', weight=1), 161 | TagField('lists')], definition=definition) 162 | 163 | ``` 164 | This specifies which fields should be indexed. Additionally the weight argument allows for increasing the effect of matches in certain fields such as "repo_name". 165 | 166 | Once the index is created documents are indexed in real time as they are added to Redis. To add new documents to the index simply create a hash for that document. 167 | 168 | 169 | #### General Search 170 | 171 | ``` 172 | GET /search?query= 173 | ``` 174 | 175 | Full text search across all the resources. 176 | 177 | ``` 178 | FT.SEARCH {index} {query} 179 | ``` 180 | 181 | 182 | #### Faceted Search 183 | 184 | ``` 185 | GET /search?query=&source=&language=&awesome-list= 186 | ``` 187 | 188 | Redisearch supports [field modifiers](https://oss.redislabs.com/redisearch/Query_Syntax/#field_modifiers) in the query. Modifiers can be combined to implement filtering on multiple fields. We use field modifiers to implement faceted search on specific sources, languages, awesome lists. 189 | 190 | ``` 191 | FT.SEARCH {index} @resouce:(tweets|github) @language:(Python|C) @awesome_list:(awesome-python) {query} 192 | ``` 193 | 194 | 195 | Alternatively instead of specifying the source (ie: tweet or github) as a field modifier separate indexes could be built for each source, by providing a more specific key prefix. Ie: 196 | ``` 197 | definition_git = IndexDefinition(prefix=['resource:github']) 198 | definition_tweet = IndexDefinition(prefix=['resource:tweet']) 199 | ``` 200 | 201 | The separate indexes would result in faster queries but introduce additional complexity for ranking / pagination if the user chooses to search across both sources. 202 | 203 | ## Development 204 | 205 | ### Python 206 | 207 | First `cd searchapp`. 208 | 209 | Create a new python virtual environment. 210 | ``` 211 | python -m venv venv 212 | ``` 213 | 214 | Activate the virtual environment. 215 | ``` 216 | source venv/bin/activate 217 | ``` 218 | 219 | Install python dependecies 220 | ``` 221 | pip install -r requirements.txt 222 | ``` 223 | 224 | 225 | ### Redis 226 | 227 | Start a Docker container running the Redis instance with Redis Search. 228 | ``` 229 | docker run -d -p 6379:6379 redislabs/redisearch:2.0.0 230 | ``` 231 | 232 | ### Config 233 | 234 | The default `config.ini.sample` values are for local development. 235 | 236 | Request a personal access token for the Github API [here](https://github.com/settings/tokens). 237 | 238 | Copy / set the appropriate keys into `config.ini`. 239 | 240 | 241 | ### Seed database 242 | Once Redis is up and running seed the database with some awesome list data. 243 | 244 | In `assets/list_of_lists.txt` configure which awesome lists you would like to scrape and the maximum number of repos to insert per list. To limit the number of projects scraped decrease `MAX_RES_PER_LIST` in `settings.py`. 245 | 246 | ``` 247 | python -m indexer.index 248 | ``` 249 | 250 | ### Django 251 | 252 | Run tests. 253 | ``` 254 | python manage.py test 255 | ``` 256 | 257 | Start the django server. 258 | ``` 259 | python manage.py runserver 260 | ``` 261 | 262 | ### CLI 263 | 264 | Run the following commands in the awesome-search project root. 265 | 266 | Install the CLI for testing locally. 267 | ``` 268 | python setup.py install 269 | ``` 270 | 271 | Run tests. 272 | ``` 273 | python -m unittest 274 | ``` 275 | 276 | If using Raycast any changes in the script will automatically be reflected. Simply run the script again to debug any changes. 277 | 278 | 279 | ## Deployment 280 | 281 | ### Redis 282 | 283 | Create a Redis instance on [Redis Cloud](https://redislabs.com/redis-enterprise-cloud/overview/). Set the port, host, and password of your instance in the redis section of the `searchapp/config.ini`. 284 | 285 | 286 | 287 | ### Backend 288 | 289 | For detailed steps for deploying Django on App Engine see the official [documentation](https://cloud.google.com/python/django/appengine). 290 | 291 | In the `searchapp/` root. 292 | 293 | Set your project ID: 294 | ``` 295 | gcloud config set project my-project-id 296 | ``` 297 | 298 | [Create a MySQL database](https://cloud.google.com/python/django/appengine#creating_a_cloud_sql_instance). Then set the connection string / password in the deployment `config.ini`. 299 | 300 | To deploy run 301 | ``` 302 | gcloud app deploy 303 | ``` 304 | 305 | ### CLI 306 | 307 | Create dist bundle. 308 | ``` 309 | python setup.py sdist 310 | ``` 311 | 312 | Push to PyPi 313 | ``` 314 | twine upload dist/* 315 | ``` 316 | --------------------------------------------------------------------------------