├── .gitattributes
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── resources
└── flash_cards
│ ├── OO Design.apkg
│ ├── System Design Exercises.apkg
│ └── System Design.apkg
└── solutions
├── object_oriented_design
├── call_center
│ ├── __init__.py
│ ├── call_center.ipynb
│ └── call_center.py
├── deck_of_cards
│ ├── __init__.py
│ ├── deck_of_cards.ipynb
│ └── deck_of_cards.py
├── hash_table
│ ├── __init__.py
│ ├── hash_map.ipynb
│ └── hash_map.py
├── lru_cache
│ ├── __init__.py
│ ├── lru_cache.ipynb
│ └── lru_cache.py
├── online_chat
│ ├── __init__.py
│ ├── online_chat.ipynb
│ └── online_chat.py
└── parking_lot
│ ├── __init__.py
│ ├── parking_lot.ipynb
│ └── parking_lot.py
└── system_design
├── mint
├── README.md
├── __init__.py
├── mint.png
├── mint_basic.png
├── mint_mapreduce.py
└── mint_snippets.py
├── pastebin
├── README.md
├── __init__.py
├── pastebin.png
├── pastebin.py
└── pastebin_basic.png
├── query_cache
├── README.md
├── __init__.py
├── query_cache.png
├── query_cache_basic.png
└── query_cache_snippets.py
├── sales_rank
├── README.md
├── __init__.py
├── sales_rank.png
├── sales_rank_basic.png
└── sales_rank_mapreduce.py
├── scaling_aws
├── README.md
├── scaling_aws.png
├── scaling_aws_1.png
├── scaling_aws_2.png
├── scaling_aws_3.png
├── scaling_aws_4.png
├── scaling_aws_5.png
├── scaling_aws_6.png
└── scaling_aws_7.png
├── social_graph
├── README.md
├── __init__.py
├── social_graph.png
├── social_graph_basic.png
└── social_graph_snippets.py
├── twitter
├── README.md
├── twitter.png
└── twitter_basic.png
└── web_crawler
├── README.md
├── __init__.py
├── web_crawler.png
├── web_crawler_basic.png
├── web_crawler_mapreduce.py
└── web_crawler_snippets.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 |
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 |
43 | # Translations
44 | *.mo
45 | *.pot
46 |
47 | # Django stuff:
48 | *.log
49 |
50 | # Sphinx documentation
51 | docs/_build/
52 |
53 | # PyBuilder
54 | target/
55 |
56 | # IPython notebook
57 | .ipynb_checkpoints
58 |
59 | # Repo scratch directory
60 | scratch/
61 |
62 | # IPython Notebook templates
63 | template.ipynb
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Contributing
2 | ============
3 |
4 | Contributions are welcome!
5 |
6 | **Please carefully read this page to make the code review process go as smoothly as possible and to maximize the likelihood of your contribution being merged.**
7 |
8 | ## Bug Reports
9 |
10 | For bug reports or requests [submit an issue](https://github.com/donnemartin/system-design-primer/issues).
11 |
12 | ## Pull Requests
13 |
14 | The preferred way to contribute is to fork the
15 | [main repository](https://github.com/donnemartin/system-design-primer) on GitHub.
16 |
17 | 1. Fork the [main repository](https://github.com/donnemartin/system-design-primer). Click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server.
18 |
19 | 2. Clone this copy to your local disk:
20 |
21 | $ git clone git@github.com:YourLogin/system-design-primer.git
22 | $ cd system-design-primer
23 |
24 | 3. Create a branch to hold your changes and start making changes. Don't work in the `master` branch!
25 |
26 | $ git checkout -b my-feature
27 |
28 | 4. Work on this copy on your computer using Git to do the version control. When you're done editing, run the following to record your changes in Git:
29 |
30 | $ git add modified_files
31 | $ git commit
32 |
33 | 5. Push your changes to GitHub with:
34 |
35 | $ git push -u origin my-feature
36 |
37 | 6. Finally, go to the web page of your fork of the `system-design-primer` repo and click 'Pull Request' to send your changes for review.
38 |
39 | ### GitHub Pull Requests Docs
40 |
41 | If you are not familiar with pull requests, review the [pull request docs](https://help.github.com/articles/using-pull-requests/).
42 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Creative Commons Attribution 4.0 International License (CC BY 4.0)
2 |
3 | http://creativecommons.org/licenses/by/4.0/
4 |
--------------------------------------------------------------------------------
/resources/flash_cards/OO Design.apkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/resources/flash_cards/OO Design.apkg
--------------------------------------------------------------------------------
/resources/flash_cards/System Design Exercises.apkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/resources/flash_cards/System Design Exercises.apkg
--------------------------------------------------------------------------------
/resources/flash_cards/System Design.apkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/resources/flash_cards/System Design.apkg
--------------------------------------------------------------------------------
/solutions/object_oriented_design/call_center/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/call_center/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/call_center/call_center.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design a call center"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* What levels of employees are in the call center?\n",
24 | " * Operator, supervisor, director\n",
25 | "* Can we assume operators always get the initial calls?\n",
26 | " * Yes\n",
27 | "* If there is no free operators or the operator can't handle the call, does the call go to the supervisors?\n",
28 | " * Yes\n",
29 | "* If there is no free supervisors or the supervisor can't handle the call, does the call go to the directors?\n",
30 | " * Yes\n",
31 | "* Can we assume the directors can handle all calls?\n",
32 | " * Yes\n",
33 | "* What happens if nobody can answer the call?\n",
34 | " * It gets queued\n",
35 | "* Do we need to handle 'VIP' calls where we put someone to the front of the line?\n",
36 | " * No\n",
37 | "* Can we assume inputs are valid or do we have to validate them?\n",
38 | " * Assume they're valid"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Solution"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 1,
51 | "metadata": {
52 | "collapsed": false
53 | },
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "Overwriting call_center.py\n"
60 | ]
61 | }
62 | ],
63 | "source": [
64 | "%%writefile call_center.py\n",
65 | "from abc import ABCMeta, abstractmethod\n",
66 | "from collections import deque\n",
67 | "from enum import Enum\n",
68 | "\n",
69 | "\n",
70 | "class Rank(Enum):\n",
71 | "\n",
72 | " OPERATOR = 0\n",
73 | " SUPERVISOR = 1\n",
74 | " DIRECTOR = 2\n",
75 | "\n",
76 | "\n",
77 | "class Employee(metaclass=ABCMeta):\n",
78 | "\n",
79 | " def __init__(self, employee_id, name, rank, call_center):\n",
80 | " self.employee_id = employee_id\n",
81 | " self.name = name\n",
82 | " self.rank = rank\n",
83 | " self.call = None\n",
84 | " self.call_center = call_center\n",
85 | "\n",
86 | " def take_call(self, call):\n",
87 | " \"\"\"Assume the employee will always successfully take the call.\"\"\"\n",
88 | " self.call = call\n",
89 | " self.call.employee = self\n",
90 | " self.call.state = CallState.IN_PROGRESS\n",
91 | "\n",
92 | " def complete_call(self):\n",
93 | " self.call.state = CallState.COMPLETE\n",
94 | " self.call_center.notify_call_completed(self.call)\n",
95 | "\n",
96 | " @abstractmethod\n",
97 | " def escalate_call(self):\n",
98 | " pass\n",
99 | "\n",
100 | " def _escalate_call(self):\n",
101 | " self.call.state = CallState.READY\n",
102 | " call = self.call\n",
103 | " self.call = None\n",
104 | " self.call_center.notify_call_escalated(call)\n",
105 | "\n",
106 | "\n",
107 | "class Operator(Employee):\n",
108 | "\n",
109 | " def __init__(self, employee_id, name):\n",
110 | " super(Operator, self).__init__(employee_id, name, Rank.OPERATOR)\n",
111 | "\n",
112 | " def escalate_call(self):\n",
113 | " self.call.level = Rank.SUPERVISOR\n",
114 | " self._escalate_call()\n",
115 | "\n",
116 | "\n",
117 | "class Supervisor(Employee):\n",
118 | "\n",
119 | " def __init__(self, employee_id, name):\n",
120 | " super(Operator, self).__init__(employee_id, name, Rank.SUPERVISOR)\n",
121 | "\n",
122 | " def escalate_call(self):\n",
123 | " self.call.level = Rank.DIRECTOR\n",
124 | " self._escalate_call()\n",
125 | "\n",
126 | "\n",
127 | "class Director(Employee):\n",
128 | "\n",
129 | " def __init__(self, employee_id, name):\n",
130 | " super(Operator, self).__init__(employee_id, name, Rank.DIRECTOR)\n",
131 | "\n",
132 | " def escalate_call(self):\n",
133 | " raise NotImplemented('Directors must be able to handle any call')\n",
134 | "\n",
135 | "\n",
136 | "class CallState(Enum):\n",
137 | "\n",
138 | " READY = 0\n",
139 | " IN_PROGRESS = 1\n",
140 | " COMPLETE = 2\n",
141 | "\n",
142 | "\n",
143 | "class Call(object):\n",
144 | "\n",
145 | " def __init__(self, rank):\n",
146 | " self.state = CallState.READY\n",
147 | " self.rank = rank\n",
148 | " self.employee = None\n",
149 | "\n",
150 | "\n",
151 | "class CallCenter(object):\n",
152 | "\n",
153 | " def __init__(self, operators, supervisors, directors):\n",
154 | " self.operators = operators\n",
155 | " self.supervisors = supervisors\n",
156 | " self.directors = directors\n",
157 | " self.queued_calls = deque()\n",
158 | "\n",
159 | " def dispatch_call(self, call):\n",
160 | " if call.rank not in (Rank.OPERATOR, Rank.SUPERVISOR, Rank.DIRECTOR):\n",
161 | " raise ValueError('Invalid call rank: {}'.format(call.rank))\n",
162 | " employee = None\n",
163 | " if call.rank == Rank.OPERATOR:\n",
164 | " employee = self._dispatch_call(call, self.operators)\n",
165 | " if call.rank == Rank.SUPERVISOR or employee is None:\n",
166 | " employee = self._dispatch_call(call, self.supervisors)\n",
167 | " if call.rank == Rank.DIRECTOR or employee is None:\n",
168 | " employee = self._dispatch_call(call, self.directors)\n",
169 | " if employee is None:\n",
170 | " self.queued_calls.append(call)\n",
171 | "\n",
172 | " def _dispatch_call(self, call, employees):\n",
173 | " for employee in employees:\n",
174 | " if employee.call is None:\n",
175 | " employee.take_call(call)\n",
176 | " return employee\n",
177 | " return None\n",
178 | "\n",
179 | " def notify_call_escalated(self, call): # ...\n",
180 | " def notify_call_completed(self, call): # ...\n",
181 | " def dispatch_queued_call_to_newly_freed_employee(self, call, employee): # ..."
182 | ]
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.4.3"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 0
206 | }
207 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/call_center/call_center.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from collections import deque
3 | from enum import Enum
4 |
5 |
6 | class Rank(Enum):
7 |
8 | OPERATOR = 0
9 | SUPERVISOR = 1
10 | DIRECTOR = 2
11 |
12 |
13 | class Employee(metaclass=ABCMeta):
14 |
15 | def __init__(self, employee_id, name, rank, call_center):
16 | self.employee_id = employee_id
17 | self.name = name
18 | self.rank = rank
19 | self.call = None
20 | self.call_center = call_center
21 |
22 | def take_call(self, call):
23 | """Assume the employee will always successfully take the call."""
24 | self.call = call
25 | self.call.employee = self
26 | self.call.state = CallState.IN_PROGRESS
27 |
28 | def complete_call(self):
29 | self.call.state = CallState.COMPLETE
30 | self.call_center.notify_call_completed(self.call)
31 |
32 | @abstractmethod
33 | def escalate_call(self):
34 | pass
35 |
36 | def _escalate_call(self):
37 | self.call.state = CallState.READY
38 | call = self.call
39 | self.call = None
40 | self.call_center.notify_call_escalated(call)
41 |
42 |
43 | class Operator(Employee):
44 |
45 | def __init__(self, employee_id, name):
46 | super(Operator, self).__init__(employee_id, name, Rank.OPERATOR)
47 |
48 | def escalate_call(self):
49 | self.call.level = Rank.SUPERVISOR
50 | self._escalate_call()
51 |
52 |
53 | class Supervisor(Employee):
54 |
55 | def __init__(self, employee_id, name):
56 | super(Operator, self).__init__(employee_id, name, Rank.SUPERVISOR)
57 |
58 | def escalate_call(self):
59 | self.call.level = Rank.DIRECTOR
60 | self._escalate_call()
61 |
62 |
63 | class Director(Employee):
64 |
65 | def __init__(self, employee_id, name):
66 | super(Operator, self).__init__(employee_id, name, Rank.DIRECTOR)
67 |
68 | def escalate_call(self):
69 | raise NotImplemented('Directors must be able to handle any call')
70 |
71 |
72 | class CallState(Enum):
73 |
74 | READY = 0
75 | IN_PROGRESS = 1
76 | COMPLETE = 2
77 |
78 |
79 | class Call(object):
80 |
81 | def __init__(self, rank):
82 | self.state = CallState.READY
83 | self.rank = rank
84 | self.employee = None
85 |
86 |
87 | class CallCenter(object):
88 |
89 | def __init__(self, operators, supervisors, directors):
90 | self.operators = operators
91 | self.supervisors = supervisors
92 | self.directors = directors
93 | self.queued_calls = deque()
94 |
95 | def dispatch_call(self, call):
96 | if call.rank not in (Rank.OPERATOR, Rank.SUPERVISOR, Rank.DIRECTOR):
97 | raise ValueError('Invalid call rank: {}'.format(call.rank))
98 | employee = None
99 | if call.rank == Rank.OPERATOR:
100 | employee = self._dispatch_call(call, self.operators)
101 | if call.rank == Rank.SUPERVISOR or employee is None:
102 | employee = self._dispatch_call(call, self.supervisors)
103 | if call.rank == Rank.DIRECTOR or employee is None:
104 | employee = self._dispatch_call(call, self.directors)
105 | if employee is None:
106 | self.queued_calls.append(call)
107 |
108 | def _dispatch_call(self, call, employees):
109 | for employee in employees:
110 | if employee.call is None:
111 | employee.take_call(call)
112 | return employee
113 | return None
114 |
115 | def notify_call_escalated(self, call): # ...
116 | def notify_call_completed(self, call): # ...
117 | def dispatch_queued_call_to_newly_freed_employee(self, call, employee): # ...
--------------------------------------------------------------------------------
/solutions/object_oriented_design/deck_of_cards/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/deck_of_cards/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/deck_of_cards/deck_of_cards.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design a deck of cards"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* Is this a generic deck of cards for games like poker and black jack?\n",
24 | " * Yes, design a generic deck then extend it to black jack\n",
25 | "* Can we assume the deck has 52 cards (2-10, Jack, Queen, King, Ace) and 4 suits?\n",
26 | " * Yes\n",
27 | "* Can we assume inputs are valid or do we have to validate them?\n",
28 | " * Assume they're valid"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Solution"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 1,
41 | "metadata": {
42 | "collapsed": false
43 | },
44 | "outputs": [
45 | {
46 | "name": "stdout",
47 | "output_type": "stream",
48 | "text": [
49 | "Overwriting deck_of_cards.py\n"
50 | ]
51 | }
52 | ],
53 | "source": [
54 | "%%writefile deck_of_cards.py\n",
55 | "from abc import ABCMeta, abstractmethod\n",
56 | "from enum import Enum\n",
57 | "import sys\n",
58 | "\n",
59 | "\n",
60 | "class Suit(Enum):\n",
61 | "\n",
62 | " HEART = 0\n",
63 | " DIAMOND = 1\n",
64 | " CLUBS = 2\n",
65 | " SPADE = 3\n",
66 | "\n",
67 | "\n",
68 | "class Card(metaclass=ABCMeta):\n",
69 | "\n",
70 | " def __init__(self, value, suit):\n",
71 | " self.value = value\n",
72 | " self.suit = suit\n",
73 | " self.is_available = True\n",
74 | "\n",
75 | " @property\n",
76 | " @abstractmethod\n",
77 | " def value(self):\n",
78 | " pass\n",
79 | "\n",
80 | " @value.setter\n",
81 | " @abstractmethod\n",
82 | " def value(self, other):\n",
83 | " pass\n",
84 | "\n",
85 | "\n",
86 | "class BlackJackCard(Card):\n",
87 | "\n",
88 | " def __init__(self, value, suit):\n",
89 | " super(BlackJackCard, self).__init__(value, suit)\n",
90 | "\n",
91 | " def is_ace(self):\n",
92 | " return True if self._value == 1 else False\n",
93 | "\n",
94 | " def is_face_card(self):\n",
95 | " \"\"\"Jack = 11, Queen = 12, King = 13\"\"\"\n",
96 | " return True if 10 < self._value <= 13 else False\n",
97 | "\n",
98 | " @property\n",
99 | " def value(self):\n",
100 | " if self.is_ace() == 1:\n",
101 | " return 1\n",
102 | " elif self.is_face_card():\n",
103 | " return 10\n",
104 | " else:\n",
105 | " return self._value\n",
106 | "\n",
107 | " @value.setter\n",
108 | " def value(self, new_value):\n",
109 | " if 1 <= new_value <= 13:\n",
110 | " self._value = new_value\n",
111 | " else:\n",
112 | " raise ValueError('Invalid card value: {}'.format(new_value))\n",
113 | "\n",
114 | "\n",
115 | "class Hand(object):\n",
116 | "\n",
117 | " def __init__(self, cards):\n",
118 | " self.cards = cards\n",
119 | "\n",
120 | " def add_card(self, card):\n",
121 | " self.cards.append(card)\n",
122 | "\n",
123 | " def score(self):\n",
124 | " total_value = 0\n",
125 | " for card in card:\n",
126 | " total_value += card.value\n",
127 | " return total_value\n",
128 | "\n",
129 | "\n",
130 | "class BlackJackHand(Hand):\n",
131 | "\n",
132 | " BLACKJACK = 21\n",
133 | "\n",
134 | " def __init__(self, cards):\n",
135 | " super(BlackJackHand, self).__init__(cards)\n",
136 | "\n",
137 | " def score(self):\n",
138 | " min_over = sys.MAXSIZE\n",
139 | " max_under = -sys.MAXSIZE\n",
140 | " for score in self.possible_scores():\n",
141 | " if self.BLACKJACK < score < min_over:\n",
142 | " min_over = score\n",
143 | " elif max_under < score <= self.BLACKJACK:\n",
144 | " max_under = score\n",
145 | " return max_under if max_under != -sys.MAXSIZE else min_over\n",
146 | "\n",
147 | " def possible_scores(self):\n",
148 | " \"\"\"Return a list of possible scores, taking Aces into account.\"\"\"\n",
149 | " # ...\n",
150 | "\n",
151 | "\n",
152 | "class Deck(object):\n",
153 | "\n",
154 | " def __init__(self, cards):\n",
155 | " self.cards = cards\n",
156 | " self.deal_index = 0\n",
157 | "\n",
158 | " def remaining_cards(self):\n",
159 | " return len(self.cards) - deal_index\n",
160 | "\n",
161 | " def deal_card():\n",
162 | " try:\n",
163 | " card = self.cards[self.deal_index]\n",
164 | " card.is_available = False\n",
165 | " self.deal_index += 1\n",
166 | " except IndexError:\n",
167 | " return None\n",
168 | " return card\n",
169 | "\n",
170 | " def shuffle(self): # ..."
171 | ]
172 | }
173 | ],
174 | "metadata": {
175 | "kernelspec": {
176 | "display_name": "Python 3",
177 | "language": "python",
178 | "name": "python3"
179 | },
180 | "language_info": {
181 | "codemirror_mode": {
182 | "name": "ipython",
183 | "version": 3
184 | },
185 | "file_extension": ".py",
186 | "mimetype": "text/x-python",
187 | "name": "python",
188 | "nbconvert_exporter": "python",
189 | "pygments_lexer": "ipython3",
190 | "version": "3.4.3"
191 | }
192 | },
193 | "nbformat": 4,
194 | "nbformat_minor": 0
195 | }
196 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/deck_of_cards/deck_of_cards.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from enum import Enum
3 | import sys
4 |
5 |
6 | class Suit(Enum):
7 |
8 | HEART = 0
9 | DIAMOND = 1
10 | CLUBS = 2
11 | SPADE = 3
12 |
13 |
14 | class Card(metaclass=ABCMeta):
15 |
16 | def __init__(self, value, suit):
17 | self.value = value
18 | self.suit = suit
19 | self.is_available = True
20 |
21 | @property
22 | @abstractmethod
23 | def value(self):
24 | pass
25 |
26 | @value.setter
27 | @abstractmethod
28 | def value(self, other):
29 | pass
30 |
31 |
32 | class BlackJackCard(Card):
33 |
34 | def __init__(self, value, suit):
35 | super(BlackJackCard, self).__init__(value, suit)
36 |
37 | def is_ace(self):
38 | return True if self._value == 1 else False
39 |
40 | def is_face_card(self):
41 | """Jack = 11, Queen = 12, King = 13"""
42 | return True if 10 < self._value <= 13 else False
43 |
44 | @property
45 | def value(self):
46 | if self.is_ace() == 1:
47 | return 1
48 | elif self.is_face_card():
49 | return 10
50 | else:
51 | return self._value
52 |
53 | @value.setter
54 | def value(self, new_value):
55 | if 1 <= new_value <= 13:
56 | self._value = new_value
57 | else:
58 | raise ValueError('Invalid card value: {}'.format(new_value))
59 |
60 |
61 | class Hand(object):
62 |
63 | def __init__(self, cards):
64 | self.cards = cards
65 |
66 | def add_card(self, card):
67 | self.cards.append(card)
68 |
69 | def score(self):
70 | total_value = 0
71 | for card in card:
72 | total_value += card.value
73 | return total_value
74 |
75 |
76 | class BlackJackHand(Hand):
77 |
78 | BLACKJACK = 21
79 |
80 | def __init__(self, cards):
81 | super(BlackJackHand, self).__init__(cards)
82 |
83 | def score(self):
84 | min_over = sys.MAXSIZE
85 | max_under = -sys.MAXSIZE
86 | for score in self.possible_scores():
87 | if self.BLACKJACK < score < min_over:
88 | min_over = score
89 | elif max_under < score <= self.BLACKJACK:
90 | max_under = score
91 | return max_under if max_under != -sys.MAXSIZE else min_over
92 |
93 | def possible_scores(self):
94 | """Return a list of possible scores, taking Aces into account."""
95 | # ...
96 |
97 |
98 | class Deck(object):
99 |
100 | def __init__(self, cards):
101 | self.cards = cards
102 | self.deal_index = 0
103 |
104 | def remaining_cards(self):
105 | return len(self.cards) - deal_index
106 |
107 | def deal_card():
108 | try:
109 | card = self.cards[self.deal_index]
110 | card.is_available = False
111 | self.deal_index += 1
112 | except IndexError:
113 | return None
114 | return card
115 |
116 | def shuffle(self): # ...
--------------------------------------------------------------------------------
/solutions/object_oriented_design/hash_table/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/hash_table/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/hash_table/hash_map.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design a hash map"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* For simplicity, are the keys integers only?\n",
24 | " * Yes\n",
25 | "* For collision resolution, can we use chaining?\n",
26 | " * Yes\n",
27 | "* Do we have to worry about load factors?\n",
28 | " * No\n",
29 | "* Can we assume inputs are valid or do we have to validate them?\n",
30 | " * Assume they're valid\n",
31 | "* Can we assume this fits memory?\n",
32 | " * Yes"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Solution"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 1,
45 | "metadata": {
46 | "collapsed": false
47 | },
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "Overwriting hash_map.py\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "%%writefile hash_map.py\n",
59 | "class Item(object):\n",
60 | "\n",
61 | " def __init__(self, key, value):\n",
62 | " self.key = key\n",
63 | " self.value = value\n",
64 | "\n",
65 | "\n",
66 | "class HashTable(object):\n",
67 | "\n",
68 | " def __init__(self, size):\n",
69 | " self.size = size\n",
70 | " self.table = [[] for _ in range(self.size)]\n",
71 | "\n",
72 | " def _hash_function(self, key):\n",
73 | " return key % self.size\n",
74 | "\n",
75 | " def set(self, key, value):\n",
76 | " hash_index = self._hash_function(key)\n",
77 | " for item in self.table[hash_index]:\n",
78 | " if item.key == key:\n",
79 | " item.value = value\n",
80 | " return\n",
81 | " self.table[hash_index].append(Item(key, value))\n",
82 | "\n",
83 | " def get(self, key):\n",
84 | " hash_index = self._hash_function(key)\n",
85 | " for item in self.table[hash_index]:\n",
86 | " if item.key == key:\n",
87 | " return item.value\n",
88 | " raise KeyError('Key not found')\n",
89 | "\n",
90 | " def remove(self, key):\n",
91 | " hash_index = self._hash_function(key)\n",
92 | " for index, item in enumerate(self.table[hash_index]):\n",
93 | " if item.key == key:\n",
94 | " del self.table[hash_index][index]\n",
95 | " return\n",
96 | " raise KeyError('Key not found')"
97 | ]
98 | }
99 | ],
100 | "metadata": {
101 | "kernelspec": {
102 | "display_name": "Python 3",
103 | "language": "python",
104 | "name": "python3"
105 | },
106 | "language_info": {
107 | "codemirror_mode": {
108 | "name": "ipython",
109 | "version": 3
110 | },
111 | "file_extension": ".py",
112 | "mimetype": "text/x-python",
113 | "name": "python",
114 | "nbconvert_exporter": "python",
115 | "pygments_lexer": "ipython3",
116 | "version": "3.4.3"
117 | }
118 | },
119 | "nbformat": 4,
120 | "nbformat_minor": 0
121 | }
122 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/hash_table/hash_map.py:
--------------------------------------------------------------------------------
1 | class Item(object):
2 |
3 | def __init__(self, key, value):
4 | self.key = key
5 | self.value = value
6 |
7 |
8 | class HashTable(object):
9 |
10 | def __init__(self, size):
11 | self.size = size
12 | self.table = [[] for _ in range(self.size)]
13 |
14 | def _hash_function(self, key):
15 | return key % self.size
16 |
17 | def set(self, key, value):
18 | hash_index = self._hash_function(key)
19 | for item in self.table[hash_index]:
20 | if item.key == key:
21 | item.value = value
22 | return
23 | self.table[hash_index].append(Item(key, value))
24 |
25 | def get(self, key):
26 | hash_index = self._hash_function(key)
27 | for item in self.table[hash_index]:
28 | if item.key == key:
29 | return item.value
30 | raise KeyError('Key not found')
31 |
32 | def remove(self, key):
33 | hash_index = self._hash_function(key)
34 | for index, item in enumerate(self.table[hash_index]):
35 | if item.key == key:
36 | del self.table[hash_index][index]
37 | return
38 | raise KeyError('Key not found')
--------------------------------------------------------------------------------
/solutions/object_oriented_design/lru_cache/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/lru_cache/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/lru_cache/lru_cache.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design an LRU cache"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* What are we caching?\n",
24 | " * We are cahing the results of web queries\n",
25 | "* Can we assume inputs are valid or do we have to validate them?\n",
26 | " * Assume they're valid\n",
27 | "* Can we assume this fits memory?\n",
28 | " * Yes"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Solution"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 1,
41 | "metadata": {
42 | "collapsed": false
43 | },
44 | "outputs": [
45 | {
46 | "name": "stdout",
47 | "output_type": "stream",
48 | "text": [
49 | "Overwriting lru_cache.py\n"
50 | ]
51 | }
52 | ],
53 | "source": [
54 | "%%writefile lru_cache.py\n",
55 | "class Node(object):\n",
56 | "\n",
57 | " def __init__(self, results):\n",
58 | " self.results = results\n",
59 | " self.next = next\n",
60 | "\n",
61 | "\n",
62 | "class LinkedList(object):\n",
63 | "\n",
64 | " def __init__(self):\n",
65 | " self.head = None\n",
66 | " self.tail = None\n",
67 | "\n",
68 | " def move_to_front(self, node): # ...\n",
69 | " def append_to_front(self, node): # ...\n",
70 | " def remove_from_tail(self): # ...\n",
71 | "\n",
72 | "\n",
73 | "class Cache(object):\n",
74 | "\n",
75 | " def __init__(self, MAX_SIZE):\n",
76 | " self.MAX_SIZE = MAX_SIZE\n",
77 | " self.size = 0\n",
78 | " self.lookup = {} # key: query, value: node\n",
79 | " self.linked_list = LinkedList()\n",
80 | "\n",
81 | " def get(self, query)\n",
82 | " \"\"\"Get the stored query result from the cache.\n",
83 | " \n",
84 | " Accessing a node updates its position to the front of the LRU list.\n",
85 | " \"\"\"\n",
86 | " node = self.lookup[query]\n",
87 | " if node is None:\n",
88 | " return None\n",
89 | " self.linked_list.move_to_front(node)\n",
90 | " return node.results\n",
91 | "\n",
92 | " def set(self, results, query):\n",
93 | " \"\"\"Set the result for the given query key in the cache.\n",
94 | " \n",
95 | " When updating an entry, updates its position to the front of the LRU list.\n",
96 | " If the entry is new and the cache is at capacity, removes the oldest entry\n",
97 | " before the new entry is added.\n",
98 | " \"\"\"\n",
99 | " node = self.lookup[query]\n",
100 | " if node is not None:\n",
101 | " # Key exists in cache, update the value\n",
102 | " node.results = results\n",
103 | " self.linked_list.move_to_front(node)\n",
104 | " else:\n",
105 | " # Key does not exist in cache\n",
106 | " if self.size == self.MAX_SIZE:\n",
107 | " # Remove the oldest entry from the linked list and lookup\n",
108 | " self.lookup.pop(self.linked_list.tail.query, None)\n",
109 | " self.linked_list.remove_from_tail()\n",
110 | " else:\n",
111 | " self.size += 1\n",
112 | " # Add the new key and value\n",
113 | " new_node = Node(results)\n",
114 | " self.linked_list.append_to_front(new_node)\n",
115 | " self.lookup[query] = new_node"
116 | ]
117 | }
118 | ],
119 | "metadata": {
120 | "kernelspec": {
121 | "display_name": "Python 3",
122 | "language": "python",
123 | "name": "python3"
124 | },
125 | "language_info": {
126 | "codemirror_mode": {
127 | "name": "ipython",
128 | "version": 3
129 | },
130 | "file_extension": ".py",
131 | "mimetype": "text/x-python",
132 | "name": "python",
133 | "nbconvert_exporter": "python",
134 | "pygments_lexer": "ipython3",
135 | "version": "3.4.3"
136 | }
137 | },
138 | "nbformat": 4,
139 | "nbformat_minor": 0
140 | }
141 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/lru_cache/lru_cache.py:
--------------------------------------------------------------------------------
1 | class Node(object):
2 |
3 | def __init__(self, results):
4 | self.results = results
5 | self.next = next
6 |
7 |
8 | class LinkedList(object):
9 |
10 | def __init__(self):
11 | self.head = None
12 | self.tail = None
13 |
14 | def move_to_front(self, node): # ...
15 | def append_to_front(self, node): # ...
16 | def remove_from_tail(self): # ...
17 |
18 |
19 | class Cache(object):
20 |
21 | def __init__(self, MAX_SIZE):
22 | self.MAX_SIZE = MAX_SIZE
23 | self.size = 0
24 | self.lookup = {} # key: query, value: node
25 | self.linked_list = LinkedList()
26 |
27 | def get(self, query)
28 | """Get the stored query result from the cache.
29 |
30 | Accessing a node updates its position to the front of the LRU list.
31 | """
32 | node = self.lookup[query]
33 | if node is None:
34 | return None
35 | self.linked_list.move_to_front(node)
36 | return node.results
37 |
38 | def set(self, results, query):
39 | """Set the result for the given query key in the cache.
40 |
41 | When updating an entry, updates its position to the front of the LRU list.
42 | If the entry is new and the cache is at capacity, removes the oldest entry
43 | before the new entry is added.
44 | """
45 | node = self.lookup[query]
46 | if node is not None:
47 | # Key exists in cache, update the value
48 | node.results = results
49 | self.linked_list.move_to_front(node)
50 | else:
51 | # Key does not exist in cache
52 | if self.size == self.MAX_SIZE:
53 | # Remove the oldest entry from the linked list and lookup
54 | self.lookup.pop(self.linked_list.tail.query, None)
55 | self.linked_list.remove_from_tail()
56 | else:
57 | self.size += 1
58 | # Add the new key and value
59 | new_node = Node(results)
60 | self.linked_list.append_to_front(new_node)
61 | self.lookup[query] = new_node
--------------------------------------------------------------------------------
/solutions/object_oriented_design/online_chat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/online_chat/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/online_chat/online_chat.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design an online chat"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* Assume we'll focus on the following workflows:\n",
24 | " * Text conversations only\n",
25 | " * Users\n",
26 | " * Add a user\n",
27 | " * Remove a user\n",
28 | " * Update a user\n",
29 | " * Add to a user's friends list\n",
30 | " * Add friend request\n",
31 | " * Approve friend request\n",
32 | " * Reject friend request\n",
33 | " * Remove from a user's friends list\n",
34 | " * Create a group chat\n",
35 | " * Invite friends to a group chat\n",
36 | " * Post a message to a group chat\n",
37 | " * Private 1-1 chat\n",
38 | " * Invite a friend to a private chat\n",
39 | " * Post a meesage to a private chat\n",
40 | "* No need to worry about scaling initially"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Solution"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 1,
53 | "metadata": {
54 | "collapsed": false
55 | },
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "Overwriting online_chat.py\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "%%writefile online_chat.py\n",
67 | "from abc import ABCMeta\n",
68 | "\n",
69 | "\n",
70 | "class UserService(object):\n",
71 | "\n",
72 | " def __init__(self):\n",
73 | " self.users_by_id = {} # key: user id, value: User\n",
74 | "\n",
75 | " def add_user(self, user_id, name, pass_hash): # ...\n",
76 | " def remove_user(self, user_id): # ...\n",
77 | " def add_friend_request(self, from_user_id, to_user_id): # ...\n",
78 | " def approve_friend_request(self, from_user_id, to_user_id): # ...\n",
79 | " def reject_friend_request(self, from_user_id, to_user_id): # ...\n",
80 | "\n",
81 | "\n",
82 | "class User(object):\n",
83 | "\n",
84 | " def __init__(self, user_id, name, pass_hash):\n",
85 | " self.user_id = user_id\n",
86 | " self.name = name\n",
87 | " self.pass_hash = pass_hash\n",
88 | " self.friends_by_id = {} # key: friend id, value: User\n",
89 | " self.friend_ids_to_private_chats = {} # key: friend id, value: private chats\n",
90 | " self.group_chats_by_id = {} # key: chat id, value: GroupChat\n",
91 | " self.received_friend_requests_by_friend_id = {} # key: friend id, value: AddRequest\n",
92 | " self.sent_friend_requests_by_friend_id = {} # key: friend id, value: AddRequest\n",
93 | "\n",
94 | " def message_user(self, friend_id, message): # ...\n",
95 | " def message_group(self, group_id, message): # ...\n",
96 | " def send_friend_request(self, friend_id): # ...\n",
97 | " def receive_friend_request(self, friend_id): # ...\n",
98 | " def approve_friend_request(self, friend_id): # ...\n",
99 | " def reject_friend_request(self, friend_id): # ...\n",
100 | "\n",
101 | "\n",
102 | "class Chat(metaclass=ABCMeta):\n",
103 | "\n",
104 | " def __init__(self, chat_id):\n",
105 | " self.chat_id = chat_id\n",
106 | " self.users = []\n",
107 | " self.messages = []\n",
108 | "\n",
109 | "\n",
110 | "class PrivateChat(Chat):\n",
111 | "\n",
112 | " def __init__(self, first_user, second_user):\n",
113 | " super(PrivateChat, self).__init__()\n",
114 | " self.users.append(first_user)\n",
115 | " self.users.append(second_user)\n",
116 | "\n",
117 | "\n",
118 | "class GroupChat(Chat):\n",
119 | "\n",
120 | " def add_user(self, user): # ...\n",
121 | " def remove_user(self, user): # ... \n",
122 | "\n",
123 | "\n",
124 | "class Message(object):\n",
125 | "\n",
126 | " def __init__(self, message_id, message, timestamp):\n",
127 | " self.message_id = message_id\n",
128 | " self.message = message\n",
129 | " self.timestamp = timestamp\n",
130 | "\n",
131 | "\n",
132 | "class AddRequest(object):\n",
133 | "\n",
134 | " def __init__(self, from_user_id, to_user_id, request_status, timestamp):\n",
135 | " self.from_user_id = from_user_id\n",
136 | " self.to_user_id = to_user_id\n",
137 | " self.request_status = request_status\n",
138 | " self.timestamp = timestamp\n",
139 | "\n",
140 | "\n",
141 | "class RequestStatus(Enum):\n",
142 | "\n",
143 | " UNREAD = 0\n",
144 | " READ = 1\n",
145 | " ACCEPTED = 2\n",
146 | " REJECTED = 3"
147 | ]
148 | }
149 | ],
150 | "metadata": {
151 | "kernelspec": {
152 | "display_name": "Python 3",
153 | "language": "python",
154 | "name": "python3"
155 | },
156 | "language_info": {
157 | "codemirror_mode": {
158 | "name": "ipython",
159 | "version": 3
160 | },
161 | "file_extension": ".py",
162 | "mimetype": "text/x-python",
163 | "name": "python",
164 | "nbconvert_exporter": "python",
165 | "pygments_lexer": "ipython3",
166 | "version": "3.4.3"
167 | }
168 | },
169 | "nbformat": 4,
170 | "nbformat_minor": 0
171 | }
172 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/online_chat/online_chat.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 |
4 | class UserService(object):
5 |
6 | def __init__(self):
7 | self.users_by_id = {} # key: user id, value: User
8 |
9 | def add_user(self, user_id, name, pass_hash): # ...
10 | def remove_user(self, user_id): # ...
11 | def add_friend_request(self, from_user_id, to_user_id): # ...
12 | def approve_friend_request(self, from_user_id, to_user_id): # ...
13 | def reject_friend_request(self, from_user_id, to_user_id): # ...
14 |
15 |
16 | class User(object):
17 |
18 | def __init__(self, user_id, name, pass_hash):
19 | self.user_id = user_id
20 | self.name = name
21 | self.pass_hash = pass_hash
22 | self.friends_by_id = {} # key: friend id, value: User
23 | self.friend_ids_to_private_chats = {} # key: friend id, value: private chats
24 | self.group_chats_by_id = {} # key: chat id, value: GroupChat
25 | self.received_friend_requests_by_friend_id = {} # key: friend id, value: AddRequest
26 | self.sent_friend_requests_by_friend_id = {} # key: friend id, value: AddRequest
27 |
28 | def message_user(self, friend_id, message): # ...
29 | def message_group(self, group_id, message): # ...
30 | def send_friend_request(self, friend_id): # ...
31 | def receive_friend_request(self, friend_id): # ...
32 | def approve_friend_request(self, friend_id): # ...
33 | def reject_friend_request(self, friend_id): # ...
34 |
35 |
36 | class Chat(metaclass=ABCMeta):
37 |
38 | def __init__(self, chat_id):
39 | self.chat_id = chat_id
40 | self.users = []
41 | self.messages = []
42 |
43 |
44 | class PrivateChat(Chat):
45 |
46 | def __init__(self, first_user, second_user):
47 | super(PrivateChat, self).__init__()
48 | self.users.append(first_user)
49 | self.users.append(second_user)
50 |
51 |
52 | class GroupChat(Chat):
53 |
54 | def add_user(self, user): # ...
55 | def remove_user(self, user): # ...
56 |
57 |
58 | class Message(object):
59 |
60 | def __init__(self, message_id, message, timestamp):
61 | self.message_id = message_id
62 | self.message = message
63 | self.timestamp = timestamp
64 |
65 |
66 | class AddRequest(object):
67 |
68 | def __init__(self, from_user_id, to_user_id, request_status, timestamp):
69 | self.from_user_id = from_user_id
70 | self.to_user_id = to_user_id
71 | self.request_status = request_status
72 | self.timestamp = timestamp
73 |
74 |
75 | class RequestStatus(Enum):
76 |
77 | UNREAD = 0
78 | READ = 1
79 | ACCEPTED = 2
80 | REJECTED = 3
--------------------------------------------------------------------------------
/solutions/object_oriented_design/parking_lot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/object_oriented_design/parking_lot/__init__.py
--------------------------------------------------------------------------------
/solutions/object_oriented_design/parking_lot/parking_lot.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/system-design-primer-primer)."
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Design a parking lot"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Constraints and assumptions\n",
22 | "\n",
23 | "* What types of vehicles should we support?\n",
24 | " * Motorcycle, Car, Bus\n",
25 | "* Does each vehicle type take up a different amount of parking spots?\n",
26 | " * Yes\n",
27 | " * Motorcycle spot -> Motorcycle\n",
28 | " * Compact spot -> Motorcycle, Car\n",
29 | " * Large spot -> Motorcycle, Car\n",
30 | " * Bus can park if we have 5 consecutive \"large\" spots\n",
31 | "* Does the parking lot have multiple levels?\n",
32 | " * Yes"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Solution"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 1,
45 | "metadata": {
46 | "collapsed": false
47 | },
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "Overwriting parking_lot.py\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "%%writefile parking_lot.py\n",
59 | "from abc import ABCMeta, abstractmethod\n",
60 | "\n",
61 | "\n",
62 | "class VehicleSize(Enum):\n",
63 | "\n",
64 | " MOTORCYCLE = 0\n",
65 | " COMPACT = 1\n",
66 | " LARGE = 2\n",
67 | "\n",
68 | "\n",
69 | "class Vehicle(metaclass=ABCMeta):\n",
70 | "\n",
71 | " def __init__(self, vehicle_size, license_plate, spot_size):\n",
72 | " self.vehicle_size = vehicle_size\n",
73 | " self.license_plate = license_plate\n",
74 | " self.spot_size\n",
75 | " self.spots_taken = []\n",
76 | "\n",
77 | " def clear_spots(self):\n",
78 | " for spot in self.spots_taken:\n",
79 | " spot.remove_vehicle(self)\n",
80 | " self.spots_taken = []\n",
81 | "\n",
82 | " def take_spot(self, spot):\n",
83 | " self.spots_taken.append(spot)\n",
84 | "\n",
85 | " @abstractmethod\n",
86 | " def can_fit_in_spot(self, spot):\n",
87 | " pass\n",
88 | "\n",
89 | "\n",
90 | "class Motorcycle(Vehicle):\n",
91 | "\n",
92 | " def __init__(self, license_plate):\n",
93 | " super(Motorcycle, self).__init__(VehicleSize.MOTORCYCLE, license_plate, spot_size=1)\n",
94 | "\n",
95 | " def can_fit_in_spot(self, spot):\n",
96 | " return True\n",
97 | "\n",
98 | "\n",
99 | "class Car(Vehicle):\n",
100 | "\n",
101 | " def __init__(self, license_plate):\n",
102 | " super(Car, self).__init__(VehicleSize.COMPACT, license_plate, spot_size=1)\n",
103 | "\n",
104 | " def can_fit_in_spot(self, spot):\n",
105 | " return True if (spot.size == LARGE or spot.size == COMPACT) else False\n",
106 | "\n",
107 | "\n",
108 | "class Bus(Vehicle):\n",
109 | "\n",
110 | " def __init__(self, license_plate):\n",
111 | " super(Bus, self).__init__(VehicleSize.LARGE, license_plate, spot_size=5)\n",
112 | "\n",
113 | " def can_fit_in_spot(self, spot):\n",
114 | " return True if spot.size == LARGE else False\n",
115 | "\n",
116 | "\n",
117 | "class ParkingLot(object):\n",
118 | "\n",
119 | " def __init__(self, num_levels):\n",
120 | " self.num_levels = num_levels\n",
121 | " self.levels = []\n",
122 | "\n",
123 | " def park_vehicle(self, vehicle):\n",
124 | " for level in levels:\n",
125 | " if level.park_vehicle(vehicle):\n",
126 | " return True\n",
127 | " return False\n",
128 | "\n",
129 | "\n",
130 | "class Level(object):\n",
131 | "\n",
132 | " SPOTS_PER_ROW = 10\n",
133 | "\n",
134 | " def __init__(self, floor, total_spots):\n",
135 | " self.floor = floor\n",
136 | " self.num_spots = total_spots\n",
137 | " self.available_spots = 0\n",
138 | " self.parking_spots = []\n",
139 | "\n",
140 | " def spot_freed(self):\n",
141 | " self.available_spots += 1\n",
142 | "\n",
143 | " def park_vehicle(self, vehicle):\n",
144 | " spot = self._find_available_spot(vehicle)\n",
145 | " if spot is None:\n",
146 | " return None\n",
147 | " else:\n",
148 | " spot.park_vehicle(vehicle)\n",
149 | " return spot\n",
150 | "\n",
151 | " def _find_available_spot(self, vehicle):\n",
152 | " \"\"\"Find an available spot where vehicle can fit, or return None\"\"\"\n",
153 | " # ...\n",
154 | "\n",
155 | " def _park_starting_at_spot(self, spot, vehicle):\n",
156 | " \"\"\"Occupy starting at spot.spot_number to vehicle.spot_size.\"\"\"\n",
157 | " # ...\n",
158 | "\n",
159 | "\n",
160 | "class ParkingSpot(object):\n",
161 | "\n",
162 | " def __init__(self, level, row, spot_number, spot_size, vehicle_size):\n",
163 | " self.level = level\n",
164 | " self.row = row\n",
165 | " self.spot_number = spot_number\n",
166 | " self.spot_size = spot_size\n",
167 | " self.vehicle_size = vehicle_size\n",
168 | " self.vehicle = None\n",
169 | "\n",
170 | " def is_available(self):\n",
171 | " return True if self.vehicle is None else False\n",
172 | "\n",
173 | " def can_fit_vehicle(self, vehicle):\n",
174 | " if self.vehicle is not None:\n",
175 | " return False\n",
176 | " return vehicle.can_fit_in_spot(self)\n",
177 | "\n",
178 | " def park_vehicle(self, vehicle): # ...\n",
179 | " def remove_vehicle(self): # ..."
180 | ]
181 | }
182 | ],
183 | "metadata": {
184 | "kernelspec": {
185 | "display_name": "Python 3",
186 | "language": "python",
187 | "name": "python3"
188 | },
189 | "language_info": {
190 | "codemirror_mode": {
191 | "name": "ipython",
192 | "version": 3
193 | },
194 | "file_extension": ".py",
195 | "mimetype": "text/x-python",
196 | "name": "python",
197 | "nbconvert_exporter": "python",
198 | "pygments_lexer": "ipython3",
199 | "version": "3.4.3"
200 | }
201 | },
202 | "nbformat": 4,
203 | "nbformat_minor": 0
204 | }
205 |
--------------------------------------------------------------------------------
/solutions/object_oriented_design/parking_lot/parking_lot.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 |
4 | class VehicleSize(Enum):
5 |
6 | MOTORCYCLE = 0
7 | COMPACT = 1
8 | LARGE = 2
9 |
10 |
11 | class Vehicle(metaclass=ABCMeta):
12 |
13 | def __init__(self, vehicle_size, license_plate, spot_size):
14 | self.vehicle_size = vehicle_size
15 | self.license_plate = license_plate
16 | self.spot_size
17 | self.spots_taken = []
18 |
19 | def clear_spots(self):
20 | for spot in self.spots_taken:
21 | spot.remove_vehicle(self)
22 | self.spots_taken = []
23 |
24 | def take_spot(self, spot):
25 | self.spots_taken.append(spot)
26 |
27 | @abstractmethod
28 | def can_fit_in_spot(self, spot):
29 | pass
30 |
31 |
32 | class Motorcycle(Vehicle):
33 |
34 | def __init__(self, license_plate):
35 | super(Motorcycle, self).__init__(VehicleSize.MOTORCYCLE, license_plate, spot_size=1)
36 |
37 | def can_fit_in_spot(self, spot):
38 | return True
39 |
40 |
41 | class Car(Vehicle):
42 |
43 | def __init__(self, license_plate):
44 | super(Car, self).__init__(VehicleSize.COMPACT, license_plate, spot_size=1)
45 |
46 | def can_fit_in_spot(self, spot):
47 | return True if (spot.size == LARGE or spot.size == COMPACT) else False
48 |
49 |
50 | class Bus(Vehicle):
51 |
52 | def __init__(self, license_plate):
53 | super(Bus, self).__init__(VehicleSize.LARGE, license_plate, spot_size=5)
54 |
55 | def can_fit_in_spot(self, spot):
56 | return True if spot.size == LARGE else False
57 |
58 |
59 | class ParkingLot(object):
60 |
61 | def __init__(self, num_levels):
62 | self.num_levels = num_levels
63 | self.levels = [] # List of Levels
64 |
65 | def park_vehicle(self, vehicle):
66 | for level in levels:
67 | if level.park_vehicle(vehicle):
68 | return True
69 | return False
70 |
71 |
72 | class Level(object):
73 |
74 | SPOTS_PER_ROW = 10
75 |
76 | def __init__(self, floor, total_spots):
77 | self.floor = floor
78 | self.num_spots = total_spots
79 | self.available_spots = 0
80 | self.spots = [] # List of ParkingSpots
81 |
82 | def spot_freed(self):
83 | self.available_spots += 1
84 |
85 | def park_vehicle(self, vehicle):
86 | spot = self._find_available_spot(vehicle)
87 | if spot is None:
88 | return None
89 | else:
90 | spot.park_vehicle(vehicle)
91 | return spot
92 |
93 | def _find_available_spot(self, vehicle):
94 | """Find an available spot where vehicle can fit, or return None"""
95 | # ...
96 |
97 | def _park_starting_at_spot(self, spot, vehicle):
98 | """Occupy starting at spot.spot_number to vehicle.spot_size."""
99 | # ...
100 |
101 |
102 | class ParkingSpot(object):
103 |
104 | def __init__(self, level, row, spot_number, spot_size, vehicle_size):
105 | self.level = level
106 | self.row = row
107 | self.spot_number = spot_number
108 | self.spot_size = spot_size
109 | self.vehicle_size = vehicle_size
110 | self.vehicle = None
111 |
112 | def is_available(self):
113 | return True if self.vehicle is None else False
114 |
115 | def can_fit_vehicle(self, vehicle):
116 | if self.vehicle is not None:
117 | return False
118 | return vehicle.can_fit_in_spot(self)
119 |
120 | def park_vehicle(self, vehicle): # ...
121 | def remove_vehicle(self): # ...
--------------------------------------------------------------------------------
/solutions/system_design/mint/README.md:
--------------------------------------------------------------------------------
1 | # Design Mint.com
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | #### We'll scope the problem to handle only the following use cases
16 |
17 | * **User** connects to a financial account
18 | * **Service** extracts transactions from the account
19 | * Updates daily
20 | * Categorizes transactions
21 | * Allows manual category override by the user
22 | * No automatic re-categorization
23 | * Analyzes monthly spending, by category
24 | * **Service** recommends a budget
25 | * Allows users to manually set a budget
26 | * Sends notifications when approaching or exceeding budget
27 | * **Service** has high availability
28 |
29 | #### Out of scope
30 |
31 | * **Service** performs additional logging and analytics
32 |
33 | ### Constraints and assumptions
34 |
35 | #### State assumptions
36 |
37 | * Traffic is not evenly distributed
38 | * Automatic daily update of accounts applies only to users active in the past 30 days
39 | * Adding or removing financial accounts is relatively rare
40 | * Budget notifications don't need to be instant
41 | * 10 million users
42 | * 10 budget categories per user = 100 million budget items
43 | * Example categories:
44 | * Housing = $1,000
45 | * Food = $200
46 | * Gas = $100
47 | * Sellers are used to determine transaction category
48 | * 50,000 sellers
49 | * 30 million financial accounts
50 | * 5 billion transactions per month
51 | * 500 million read requests per month
52 | * 10:1 write to read ratio
53 | * Write-heavy, users make transactions daily, but few visit the site daily
54 |
55 | #### Calculate usage
56 |
57 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
58 |
59 | * Size per transaction:
60 | * `user_id` - 8 bytes
61 | * `created_at` - 5 bytes
62 | * `seller` - 32 bytes
63 | * `amount` - 5 bytes
64 | * Total: ~50 bytes
65 | * 250 GB of new transaction content per month
66 | * 50 bytes per transaction * 5 billion transactions per month
67 | * 9 TB of new transaction content in 3 years
68 | * Assume most are new transactions instead of updates to existing ones
69 | * 2,000 transactions per second on average
70 | * 200 read requests per second on average
71 |
72 | Handy conversion guide:
73 |
74 | * 2.5 million seconds per month
75 | * 1 request per second = 2.5 million requests per month
76 | * 40 requests per second = 100 million requests per month
77 | * 400 requests per second = 1 billion requests per month
78 |
79 | ## Step 2: Create a high level design
80 |
81 | > Outline a high level design with all important components.
82 |
83 | 
84 |
85 | ## Step 3: Design core components
86 |
87 | > Dive into details for each core component.
88 |
89 | ### Use case: User connects to a financial account
90 |
91 | We could store info on the 10 million users in a [relational database](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms). We should discuss the [use cases and tradeoffs between choosing SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql).
92 |
93 | * The **Client** sends a request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
94 | * The **Web Server** forwards the request to the **Accounts API** server
95 | * The **Accounts API** server updates the **SQL Database** `accounts` table with the newly entered account info
96 |
97 | **Clarify with your interviewer how much code you are expected to write**.
98 |
99 | The `accounts` table could have the following structure:
100 |
101 | ```
102 | id int NOT NULL AUTO_INCREMENT
103 | created_at datetime NOT NULL
104 | last_update datetime NOT NULL
105 | account_url varchar(255) NOT NULL
106 | account_login varchar(32) NOT NULL
107 | account_password_hash char(64) NOT NULL
108 | user_id int NOT NULL
109 | PRIMARY KEY(id)
110 | FOREIGN KEY(user_id) REFERENCES users(id)
111 | ```
112 |
113 | We'll create an [index](https://github.com/donnemartin/system-design-primer#use-good-indices) on `id`, `user_id `, and `created_at` to speed up lookups (log-time instead of scanning the entire table) and to keep the data in memory. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
114 |
115 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
116 |
117 | ```
118 | $ curl -X POST --data '{ "user_id": "foo", "account_url": "bar", \
119 | "account_login": "baz", "account_password": "qux" }' \
120 | https://mint.com/api/v1/account
121 | ```
122 |
123 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
124 |
125 | Next, the service extracts transactions from the account.
126 |
127 | ### Use case: Service extracts transactions from the account
128 |
129 | We'll want to extract information from an account in these cases:
130 |
131 | * The user first links the account
132 | * The user manually refreshes the account
133 | * Automatically each day for users who have been active in the past 30 days
134 |
135 | Data flow:
136 |
137 | * The **Client** sends a request to the **Web Server**
138 | * The **Web Server** forwards the request to the **Accounts API** server
139 | * The **Accounts API** server places a job on a **Queue** such as Amazon SQS or [RabbitMQ](https://www.rabbitmq.com/)
140 | * Extracting transactions could take awhile, we'd probably want to do this [asynchronously with a queue](https://github.com/donnemartin/system-design-primer#asynchronism), although this introduces additional complexity
141 | * The **Transaction Extraction Service** does the following:
142 | * Pulls from the **Queue** and extracts transactions for the given account from the financial institution, storing the results as raw log files in the **Object Store**
143 | * Uses the **Category Service** to categorize each transaction
144 | * Uses the **Budget Service** to calculate aggregate monthly spending by category
145 | * The **Budget Service** uses the **Notification Service** to let users know if they are nearing or have exceeded their budget
146 | * Updates the **SQL Database** `transactions` table with categorized transactions
147 | * Updates the **SQL Database** `monthly_spending` table with aggregate monthly spending by category
148 | * Notifies the user the transactions have completed through the **Notification Service**:
149 | * Uses a **Queue** (not pictured) to asynchronously send out notifications
150 |
151 | The `transactions` table could have the following structure:
152 |
153 | ```
154 | id int NOT NULL AUTO_INCREMENT
155 | created_at datetime NOT NULL
156 | seller varchar(32) NOT NULL
157 | amount decimal NOT NULL
158 | user_id int NOT NULL
159 | PRIMARY KEY(id)
160 | FOREIGN KEY(user_id) REFERENCES users(id)
161 | ```
162 |
163 | We'll create an [index](https://github.com/donnemartin/system-design-primer#use-good-indices) on `id`, `user_id `, and `created_at`.
164 |
165 | The `monthly_spending` table could have the following structure:
166 |
167 | ```
168 | id int NOT NULL AUTO_INCREMENT
169 | month_year date NOT NULL
170 | category varchar(32)
171 | amount decimal NOT NULL
172 | user_id int NOT NULL
173 | PRIMARY KEY(id)
174 | FOREIGN KEY(user_id) REFERENCES users(id)
175 | ```
176 |
177 | We'll create an [index](https://github.com/donnemartin/system-design-primer#use-good-indices) on `id` and `user_id `.
178 |
179 | #### Category service
180 |
181 | For the **Category Service**, we can seed a seller-to-category dictionary with the most popular sellers. If we estimate 50,000 sellers and estimate each entry to take less than 255 bytes, the dictionary would only take about 12 MB of memory.
182 |
183 | **Clarify with your interviewer how much code you are expected to write**.
184 |
185 | ```
186 | class DefaultCategories(Enum):
187 |
188 | HOUSING = 0
189 | FOOD = 1
190 | GAS = 2
191 | SHOPPING = 3
192 | ...
193 |
194 | seller_category_map = {}
195 | seller_category_map['Exxon'] = DefaultCategories.GAS
196 | seller_category_map['Target'] = DefaultCategories.SHOPPING
197 | ...
198 | ```
199 |
200 | For sellers not initially seeded in the map, we could use a crowdsourcing effort by evaluating the manual category overrides our users provide. We could use a heap to quickly lookup the top manual override per seller in O(1) time.
201 |
202 | ```
203 | class Categorizer(object):
204 |
205 | def __init__(self, seller_category_map, self.seller_category_crowd_overrides_map):
206 | self.seller_category_map = seller_category_map
207 | self.seller_category_crowd_overrides_map = \
208 | seller_category_crowd_overrides_map
209 |
210 | def categorize(self, transaction):
211 | if transaction.seller in self.seller_category_map:
212 | return self.seller_category_map[transaction.seller]
213 | elif transaction.seller in self.seller_category_crowd_overrides_map:
214 | self.seller_category_map[transaction.seller] = \
215 | self.seller_category_crowd_overrides_map[transaction.seller].peek_min()
216 | return self.seller_category_map[transaction.seller]
217 | return None
218 | ```
219 |
220 | Transaction implementation:
221 |
222 | ```
223 | class Transaction(object):
224 |
225 | def __init__(self, created_at, seller, amount):
226 | self.timestamp = timestamp
227 | self.seller = seller
228 | self.amount = amount
229 | ```
230 |
231 | ### Use case: Service recommends a budget
232 |
233 | To start, we could use a generic budget template that allocates category amounts based on income tiers. Using this approach, we would not have to store the 100 million budget items identified in the constraints, only those that the user overrides. If a user overrides a budget category, which we could store the override in the `TABLE budget_overrides`.
234 |
235 | ```
236 | class Budget(object):
237 |
238 | def __init__(self, income):
239 | self.income = income
240 | self.categories_to_budget_map = self.create_budget_template()
241 |
242 | def create_budget_template(self):
243 | return {
244 | 'DefaultCategories.HOUSING': income * .4,
245 | 'DefaultCategories.FOOD': income * .2
246 | 'DefaultCategories.GAS': income * .1,
247 | 'DefaultCategories.SHOPPING': income * .2
248 | ...
249 | }
250 |
251 | def override_category_budget(self, category, amount):
252 | self.categories_to_budget_map[category] = amount
253 | ```
254 |
255 | For the **Budget Service**, we can potentially run SQL queries on the `transactions` table to generate the `monthly_spending` aggregate table. The `monthly_spending` table would likely have much fewer rows than the total 5 billion transactions, since users typically have many transactions per month.
256 |
257 | As an alternative, we can run **MapReduce** jobs on the raw transaction files to:
258 |
259 | * Categorize each transaction
260 | * Generate aggregate monthly spending by category
261 |
262 | Running analyses on the transaction files could significantly reduce the load on the database.
263 |
264 | We could call the **Budget Service** to re-run the analysis if the user updates a category.
265 |
266 | **Clarify with your interviewer how much code you are expected to write**.
267 |
268 | Sample log file format, tab delimited:
269 |
270 | ```
271 | user_id timestamp seller amount
272 | ```
273 |
274 | **MapReduce** implementation:
275 |
276 | ```
277 | class SpendingByCategory(MRJob):
278 |
279 | def __init__(self, categorizer):
280 | self.categorizer = categorizer
281 | self.current_year_month = calc_current_year_month()
282 | ...
283 |
284 | def calc_current_year_month(self):
285 | """Return the current year and month."""
286 | ...
287 |
288 | def extract_year_month(self, timestamp):
289 | """Return the year and month portions of the timestamp."""
290 | ...
291 |
292 | def handle_budget_notifications(self, key, total):
293 | """Call notification API if nearing or exceeded budget."""
294 | ...
295 |
296 | def mapper(self, _, line):
297 | """Parse each log line, extract and transform relevant lines.
298 |
299 | Argument line will be of the form:
300 |
301 | user_id timestamp seller amount
302 |
303 | Using the categorizer to convert seller to category,
304 | emit key value pairs of the form:
305 |
306 | (user_id, 2016-01, shopping), 25
307 | (user_id, 2016-01, shopping), 100
308 | (user_id, 2016-01, gas), 50
309 | """
310 | user_id, timestamp, seller, amount = line.split('\t')
311 | category = self.categorizer.categorize(seller)
312 | period = self.extract_year_month(timestamp)
313 | if period == self.current_year_month:
314 | yield (user_id, period, category), amount
315 |
316 | def reducer(self, key, value):
317 | """Sum values for each key.
318 |
319 | (user_id, 2016-01, shopping), 125
320 | (user_id, 2016-01, gas), 50
321 | """
322 | total = sum(values)
323 | yield key, sum(values)
324 | ```
325 |
326 | ## Step 4: Scale the design
327 |
328 | > Identify and address bottlenecks, given the constraints.
329 |
330 | 
331 |
332 | **Important: Do not simply jump right into the final design from the initial design!**
333 |
334 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
335 |
336 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
337 |
338 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
339 |
340 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
341 |
342 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
343 | * [CDN](https://github.com/donnemartin/system-design-primer#content-delivery-network)
344 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
345 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
346 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
347 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
348 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
349 | * [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms)
350 | * [SQL write master-slave failover](https://github.com/donnemartin/system-design-primer#fail-over)
351 | * [Master-slave replication](https://github.com/donnemartin/system-design-primer#master-slave-replication)
352 | * [Asynchronism](https://github.com/donnemartin/system-design-primer#aysnchronism)
353 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
354 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
355 |
356 | We'll add an additional use case: **User** accesses summaries and transactions.
357 |
358 | User sessions, aggregate stats by category, and recent transactions could be placed in a **Memory Cache** such as Redis or Memcached.
359 |
360 | * The **Client** sends a read request to the **Web Server**
361 | * The **Web Server** forwards the request to the **Read API** server
362 | * Static content can be served from the **Object Store** such as S3, which is cached on the **CDN**
363 | * The **Read API** server does the following:
364 | * Checks the **Memory Cache** for the content
365 | * If the url is in the **Memory Cache**, returns the cached contents
366 | * Else
367 | * If the url is in the **SQL Database**, fetches the contents
368 | * Updates the **Memory Cache** with the contents
369 |
370 | Refer to [When to update the cache](https://github.com/donnemartin/system-design-primer#when-to-update-the-cache) for tradeoffs and alternatives. The approach above describes [cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside).
371 |
372 | Instead of keeping the `monthly_spending` aggregate table in the **SQL Database**, we could create a separate **Analytics Database** using a data warehousing solution such as Amazon Redshift or Google BigQuery.
373 |
374 | We might only want to store a month of `transactions` data in the database, while storing the rest in a data warehouse or in an **Object Store**. An **Object Store** such as Amazon S3 can comfortably handle the constraint of 250 GB of new content per month.
375 |
376 | To address the 2,000 *average* read requests per second (higher at peak), traffic for popular content should be handled by the **Memory Cache** instead of the database. The **Memory Cache** is also useful for handling the unevenly distributed traffic and traffic spikes. The **SQL Read Replicas** should be able to handle the cache misses, as long as the replicas are not bogged down with replicating writes.
377 |
378 | 200 *average* transaction writes per second (higher at peak) might be tough for a single **SQL Write Master-Slave**. We might need to employ additional SQL scaling patterns:
379 |
380 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
381 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
382 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
383 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
384 |
385 | We should also consider moving some data to a **NoSQL Database**.
386 |
387 | ## Additional talking points
388 |
389 | > Additional topics to dive into, depending on the problem scope and time remaining.
390 |
391 | #### NoSQL
392 |
393 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
394 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
395 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
396 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
397 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
398 |
399 | ### Caching
400 |
401 | * Where to cache
402 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
403 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
404 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
405 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
406 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
407 | * What to cache
408 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
409 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
410 | * When to update the cache
411 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
412 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
413 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
414 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
415 |
416 | ### Asynchronism and microservices
417 |
418 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
419 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
420 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
421 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
422 |
423 | ### Communications
424 |
425 | * Discuss tradeoffs:
426 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
427 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
428 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
429 |
430 | ### Security
431 |
432 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
433 |
434 | ### Latency numbers
435 |
436 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
437 |
438 | ### Ongoing
439 |
440 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
441 | * Scaling is an iterative process
442 |
--------------------------------------------------------------------------------
/solutions/system_design/mint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/mint/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/mint/mint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/mint/mint.png
--------------------------------------------------------------------------------
/solutions/system_design/mint/mint_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/mint/mint_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/mint/mint_mapreduce.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from mrjob.job import MRJob
4 |
5 |
6 | class SpendingByCategory(MRJob):
7 |
8 | def __init__(self, categorizer):
9 | self.categorizer = categorizer
10 | ...
11 |
12 | def current_year_month(self):
13 | """Return the current year and month."""
14 | ...
15 |
16 | def extract_year_month(self, timestamp):
17 | """Return the year and month portions of the timestamp."""
18 | ...
19 |
20 | def handle_budget_notifications(self, key, total):
21 | """Call notification API if nearing or exceeded budget."""
22 | ...
23 |
24 | def mapper(self, _, line):
25 | """Parse each log line, extract and transform relevant lines.
26 |
27 | Emit key value pairs of the form:
28 |
29 | (2016-01, shopping), 25
30 | (2016-01, shopping), 100
31 | (2016-01, gas), 50
32 | """
33 | timestamp, seller, amount = line.split('\t')
34 | period = self. extract_year_month(timestamp)
35 | if period == self.current_year_month():
36 | yield (period, category), amount
37 |
38 | def reducer(self, key, value):
39 | """Sum values for each key.
40 |
41 | (2016-01, shopping), 125
42 | (2016-01, gas), 50
43 | """
44 | total = sum(values)
45 | self.handle_budget_notifications(key, total)
46 | yield key, sum(values)
47 |
48 | def steps(self):
49 | """Run the map and reduce steps."""
50 | return [
51 | self.mr(mapper=self.mapper,
52 | reducer=self.reducer)
53 | ]
54 |
55 |
56 | if __name__ == '__main__':
57 | SpendingByCategory.run()
58 |
--------------------------------------------------------------------------------
/solutions/system_design/mint/mint_snippets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class DefaultCategories(Enum):
4 |
5 | HOUSING = 0
6 | FOOD = 1
7 | GAS = 2
8 | SHOPPING = 3
9 | ...
10 |
11 | seller_category_map = {}
12 | seller_category_map['Exxon'] = DefaultCategories.GAS
13 | seller_category_map['Target'] = DefaultCategories.SHOPPING
14 |
15 |
16 | class Categorizer(object):
17 |
18 | def __init__(self, seller_category_map, seller_category_overrides_map):
19 | self.seller_category_map = seller_category_map
20 | self.seller_category_overrides_map = seller_category_overrides_map
21 |
22 | def categorize(self, transaction):
23 | if transaction.seller in self.seller_category_map:
24 | return self.seller_category_map[transaction.seller]
25 | if transaction.seller in self.seller_category_overrides_map:
26 | seller_category_map[transaction.seller] = \
27 | self.manual_overrides[transaction.seller].peek_min()
28 | return self.seller_category_map[transaction.seller]
29 | return None
30 |
31 |
32 | class Transaction(object):
33 |
34 | def __init__(self, timestamp, seller, amount):
35 | self.timestamp = timestamp
36 | self.seller = seller
37 | self.amount = amount
38 |
39 |
40 | class Budget(object):
41 |
42 | def __init__(self, template_categories_to_budget_map):
43 | self.categories_to_budget_map = template_categories_to_budget_map
44 |
45 | def override_category_budget(self, category, amount):
46 | self.categories_to_budget_map[category] = amount
47 |
48 |
--------------------------------------------------------------------------------
/solutions/system_design/pastebin/README.md:
--------------------------------------------------------------------------------
1 | # Design Pastebin.com (or Bit.ly)
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | **Design Bit.ly** - is a similar question, except pastebin requires storing the paste contents instead of the original unshortened url.
6 |
7 | ## Step 1: Outline use cases and constraints
8 |
9 | > Gather requirements and scope the problem.
10 | > Ask questions to clarify use cases and constraints.
11 | > Discuss assumptions.
12 |
13 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
14 |
15 | ### Use cases
16 |
17 | #### We'll scope the problem to handle only the following use cases
18 |
19 | * **User** enters a block of text and gets a randomly generated link
20 | * Expiration
21 | * Default setting does not expire
22 | * Can optionally set a timed expiration
23 | * **User** enters a paste's url and views the contents
24 | * **User** is anonymous
25 | * **Service** tracks analytics of pages
26 | * Monthly visit stats
27 | * **Service** deletes expired pastes
28 | * **Service** has high availability
29 |
30 | #### Out of scope
31 |
32 | * **User** registers for an account
33 | * **User** verifies email
34 | * **User** logs into a registered account
35 | * **User** edits the document
36 | * **User** can set visibility
37 | * **User** can set the shortlink
38 |
39 | ### Constraints and assumptions
40 |
41 | #### State assumptions
42 |
43 | * Traffic is not evenly distributed
44 | * Following a short link should be fast
45 | * Pastes are text only
46 | * Page view analytics do not need to be realtime
47 | * 10 million users
48 | * 10 million paste writes per month
49 | * 100 million paste reads per month
50 | * 10:1 read to write ratio
51 |
52 | #### Calculate usage
53 |
54 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
55 |
56 | * Size per paste
57 | * 1 KB content per paste
58 | * `shortlink` - 7 bytes
59 | * `expiration_length_in_minutes` - 4 bytes
60 | * `created_at` - 5 bytes
61 | * `paste_path` - 255 bytes
62 | * total = ~1.27 KB
63 | * 12.7 GB of new paste content per month
64 | * 1.27 KB per paste * 10 million pastes per month
65 | * ~450 GB of new paste content in 3 years
66 | * 360 million shortlinks in 3 years
67 | * Assume most are new pastes instead of updates to existing ones
68 | * 4 paste writes per second on average
69 | * 40 read requests per second on average
70 |
71 | Handy conversion guide:
72 |
73 | * 2.5 million seconds per month
74 | * 1 request per second = 2.5 million requests per month
75 | * 40 requests per second = 100 million requests per month
76 | * 400 requests per second = 1 billion requests per month
77 |
78 | ## Step 2: Create a high level design
79 |
80 | > Outline a high level design with all important components.
81 |
82 | 
83 |
84 | ## Step 3: Design core components
85 |
86 | > Dive into details for each core component.
87 |
88 | ### Use case: User enters a block of text and gets a randomly generated link
89 |
90 | We could use a [relational database](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms) as a large hash table, mapping the generated url to a file server and path containing the paste file.
91 |
92 | Instead of managing a file server, we could use a managed **Object Store** such as Amazon S3 or a [NoSQL document store](https://github.com/donnemartin/system-design-primer#document-store).
93 |
94 | An alternative to a relational database acting as a large hash table, we could use a [NoSQL key-value store](https://github.com/donnemartin/system-design-primer#key-value-store). We should discuss the [tradeoffs between choosing SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql). The following discussion uses the relational database approach.
95 |
96 | * The **Client** sends a create paste request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
97 | * The **Web Server** forwards the request to the **Write API** server
98 | * The **Write API** server does does the following:
99 | * Generates a unique url
100 | * Checks if the url is unique by looking at the **SQL Database** for a duplicate
101 | * If the url is not unique, it generates another url
102 | * If we supported a custom url, we could use the user-supplied (also check for a duplicate)
103 | * Saves to the **SQL Database** `pastes` table
104 | * Saves the paste data to the **Object Store**
105 | * Returns the url
106 |
107 | **Clarify with your interviewer how much code you are expected to write**.
108 |
109 | The `pastes` table could have the following structure:
110 |
111 | ```
112 | shortlink char(7) NOT NULL
113 | expiration_length_in_minutes int NOT NULL
114 | created_at datetime NOT NULL
115 | paste_path varchar(255) NOT NULL
116 | PRIMARY KEY(shortlink)
117 | ```
118 |
119 | We'll create an [index](https://github.com/donnemartin/system-design-primer#use-good-indices) on `shortlink ` and `created_at` to speed up lookups (log-time instead of scanning the entire table) and to keep the data in memory. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
120 |
121 | To generate the unique url, we could:
122 |
123 | * Take the [**MD5**](https://en.wikipedia.org/wiki/MD5) hash of the user's ip_address + timestamp
124 | * MD5 is a widely used hashing function that produces a 128-bit hash value
125 | * MD5 is uniformly distributed
126 | * Alternatively, we could also take the MD5 hash of randomly-generated data
127 | * [**Base 62**](https://www.kerstner.at/2012/07/shortening-strings-using-base-62-encoding/) encode the MD5 hash
128 | * Base 62 encodes to `[a-zA-Z0-9]` which works well for urls, eliminating the need for escaping special characters
129 | * There is only one hash result for the original input and and Base 62 is deterministic (no randomness involved)
130 | * Base 64 is another popular encoding but provides issues for urls because of the additional `+` and `/` characters
131 | * The following [Base 62 pseudocode](http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener) runs in O(k) time where k is the number of digits = 7:
132 |
133 | ```
134 | def base_encode(num, base=62):
135 | digits = []
136 | while num > 0
137 | remainder = modulo(num, base)
138 | digits.push(remainder)
139 | num = divide(num, base)
140 | digits = digits.reverse
141 | ```
142 |
143 | * Take the first 7 characters of the output, which results in 62^7 possible values and should be sufficient to handle our constraint of 360 million shortlinks in 3 years:
144 |
145 | ```
146 | url = base_encode(md5(ip_address+timestamp))[:URL_LENGTH]
147 | ```
148 |
149 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
150 |
151 | ```
152 | $ curl -X POST --data '{ "expiration_length_in_minutes": "60", \
153 | "paste_contents": "Hello World!" }' https://pastebin.com/api/v1/paste
154 | ```
155 |
156 | Response:
157 |
158 | ```
159 | {
160 | "shortlink": "foobar"
161 | }
162 | ```
163 |
164 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
165 |
166 | ### Use case: User enters a paste's url and views the contents
167 |
168 | * The **Client** sends a get paste request to the **Web Server**
169 | * The **Web Server** forwards the request to the **Read API** server
170 | * The **Read API** server does the following:
171 | * Checks the **SQL Database** for the generated url
172 | * If the url is in the **SQL Database**, fetch the paste contents from the **Object Store**
173 | * Else, return an error message for the user
174 |
175 | REST API:
176 |
177 | ```
178 | $ curl https://pastebin.com/api/v1/paste?shortlink=foobar
179 | ```
180 |
181 | Response:
182 |
183 | ```
184 | {
185 | "paste_contents": "Hello World"
186 | "created_at": "YYYY-MM-DD HH:MM:SS"
187 | "expiration_length_in_minutes": "60"
188 | }
189 | ```
190 |
191 | ### Use case: Service tracks analytics of pages
192 |
193 | Since realtime analytics are not a requirement, we could simply **MapReduce** the **Web Server** logs to generate hit counts.
194 |
195 | **Clarify with your interviewer how much code you are expected to write**.
196 |
197 | ```
198 | class HitCounts(MRJob):
199 |
200 | def extract_url(self, line):
201 | """Extract the generated url from the log line."""
202 | ...
203 |
204 | def extract_year_month(self, line):
205 | """Return the year and month portions of the timestamp."""
206 | ...
207 |
208 | def mapper(self, _, line):
209 | """Parse each log line, extract and transform relevant lines.
210 |
211 | Emit key value pairs of the form:
212 |
213 | (2016-01, url0), 1
214 | (2016-01, url0), 1
215 | (2016-01, url1), 1
216 | """
217 | url = self.extract_url(line)
218 | period = self.extract_year_month(line)
219 | yield (period, url), 1
220 |
221 | def reducer(self, key, value):
222 | """Sum values for each key.
223 |
224 | (2016-01, url0), 2
225 | (2016-01, url1), 1
226 | """
227 | yield key, sum(values)
228 | ```
229 |
230 | ### Use case: Service deletes expired pastes
231 |
232 | To delete expired pastes, we could just scan the **SQL Database** for all entries whose expiration timestamp are older than the current timestamp. All expired entries would then be deleted (or marked as expired) from the table.
233 |
234 | ## Step 4: Scale the design
235 |
236 | > Identify and address bottlenecks, given the constraints.
237 |
238 | 
239 |
240 | **Important: Do not simply jump right into the final design from the initial design!**
241 |
242 | State you would do this iteratively: 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
243 |
244 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
245 |
246 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
247 |
248 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
249 |
250 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
251 | * [CDN](https://github.com/donnemartin/system-design-primer#content-delivery-network)
252 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
253 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
254 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
255 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
256 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
257 | * [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms)
258 | * [SQL write master-slave failover](https://github.com/donnemartin/system-design-primer#fail-over)
259 | * [Master-slave replication](https://github.com/donnemartin/system-design-primer#master-slave-replication)
260 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
261 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
262 |
263 | The **Analytics Database** could use a data warehousing solution such as Amazon Redshift or Google BigQuery.
264 |
265 | An **Object Store** such as Amazon S3 can comfortably handle the constraint of 12.7 GB of new content per month.
266 |
267 | To address the 40 *average* read requests per second (higher at peak), traffic for popular content should be handled by the **Memory Cache** instead of the database. The **Memory Cache** is also useful for handling the unevenly distributed traffic and traffic spikes. The **SQL Read Replicas** should be able to handle the cache misses, as long as the replicas are not bogged down with replicating writes.
268 |
269 | 4 *average* paste writes per second (with higher at peak) should be do-able for a single **SQL Write Master-Slave**. Otherwise, we'll need to employ additional SQL scaling patterns:
270 |
271 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
272 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
273 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
274 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
275 |
276 | We should also consider moving some data to a **NoSQL Database**.
277 |
278 | ## Additional talking points
279 |
280 | > Additional topics to dive into, depending on the problem scope and time remaining.
281 |
282 | #### NoSQL
283 |
284 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
285 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
286 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
287 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
288 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
289 |
290 | ### Caching
291 |
292 | * Where to cache
293 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
294 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
295 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
296 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
297 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
298 | * What to cache
299 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
300 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
301 | * When to update the cache
302 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
303 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
304 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
305 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
306 |
307 | ### Asynchronism and microservices
308 |
309 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
310 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
311 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
312 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
313 |
314 | ### Communications
315 |
316 | * Discuss tradeoffs:
317 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
318 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
319 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
320 |
321 | ### Security
322 |
323 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
324 |
325 | ### Latency numbers
326 |
327 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
328 |
329 | ### Ongoing
330 |
331 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
332 | * Scaling is an iterative process
333 |
--------------------------------------------------------------------------------
/solutions/system_design/pastebin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/pastebin/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/pastebin/pastebin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/pastebin/pastebin.png
--------------------------------------------------------------------------------
/solutions/system_design/pastebin/pastebin.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from mrjob.job import MRJob
4 |
5 |
6 | class HitCounts(MRJob):
7 |
8 | def extract_url(self, line):
9 | """Extract the generated url from the log line."""
10 | pass
11 |
12 | def extract_year_month(self, line):
13 | """Return the year and month portions of the timestamp."""
14 | pass
15 |
16 | def mapper(self, _, line):
17 | """Parse each log line, extract and transform relevant lines.
18 |
19 | Emit key value pairs of the form:
20 |
21 | (2016-01, url0), 1
22 | (2016-01, url0), 1
23 | (2016-01, url1), 1
24 | """
25 | url = self.extract_url(line)
26 | period = self.extract_year_month(line)
27 | yield (period, url), 1
28 |
29 | def reducer(self, key, value):
30 | """Sum values for each key.
31 |
32 | (2016-01, url0), 2
33 | (2016-01, url1), 1
34 | """
35 | yield key, sum(values)
36 |
37 | def steps(self):
38 | """Run the map and reduce steps."""
39 | return [
40 | self.mr(mapper=self.mapper,
41 | reducer=self.reducer)
42 | ]
43 |
44 |
45 | if __name__ == '__main__':
46 | HitCounts.run()
47 |
--------------------------------------------------------------------------------
/solutions/system_design/pastebin/pastebin_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/pastebin/pastebin_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/query_cache/README.md:
--------------------------------------------------------------------------------
1 | # Design a key-value cache to save the results of the most recent web server queries
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | #### We'll scope the problem to handle only the following use cases
16 |
17 | * **User** sends a search request resulting in a cache hit
18 | * **User** sends a search request resulting in a cache miss
19 | * **Service** has high availability
20 |
21 | ### Constraints and assumptions
22 |
23 | #### State assumptions
24 |
25 | * Traffic is not evenly distributed
26 | * Popular queries should almost always be in the cache
27 | * Need to determine how to expire/refresh
28 | * Serving from cache requires fast lookups
29 | * Low latency between machines
30 | * Limited memory in cache
31 | * Need to determine what to keep/remove
32 | * Need to cache millions of queries
33 | * 10 million users
34 | * 10 billion queries per month
35 |
36 | #### Calculate usage
37 |
38 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
39 |
40 | * Cache stores ordered list of key: query, value: results
41 | * `query` - 50 bytes
42 | * `title` - 20 bytes
43 | * `snippet` - 200 bytes
44 | * Total: 270 bytes
45 | * 2.7 TB of cache data per month if all 10 billion queries are unique and all are stored
46 | * 270 bytes per search * 10 billion searches per month
47 | * Assumptions state limited memory, need to determine how to expire contents
48 | * 4,000 requests per second
49 |
50 | Handy conversion guide:
51 |
52 | * 2.5 million seconds per month
53 | * 1 request per second = 2.5 million requests per month
54 | * 40 requests per second = 100 million requests per month
55 | * 400 requests per second = 1 billion requests per month
56 |
57 | ## Step 2: Create a high level design
58 |
59 | > Outline a high level design with all important components.
60 |
61 | 
62 |
63 | ## Step 3: Design core components
64 |
65 | > Dive into details for each core component.
66 |
67 | ### Use case: User sends a request resulting in a cache hit
68 |
69 | Popular queries can be served from a **Memory Cache** such as Redis or Memcached to reduce read latency and to avoid overloading the **Reverse Index Service** and **Document Service**. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
70 |
71 | Since the cache has limited capacity, we'll use a least recently used (LRU) approach to expire older entries.
72 |
73 | * The **Client** sends a request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
74 | * The **Web Server** forwards the request to the **Query API** server
75 | * The **Query API** server does the following:
76 | * Parses the query
77 | * Removes markup
78 | * Breaks up the text into terms
79 | * Fixes typos
80 | * Normalizes capitalization
81 | * Converts the query to use boolean operations
82 | * Checks the **Memory Cache** for the content matching the query
83 | * If there's a hit in the **Memory Cache**, the **Memory Cache** does the following:
84 | * Updates the cached entry's position to the front of the LRU list
85 | * Returns the cached contents
86 | * Else, the **Query API** does the following:
87 | * Uses the **Reverse Index Service** to find documents matching the query
88 | * The **Reverse Index Service** ranks the matching results and returns the top ones
89 | * Uses the **Document Service** to return titles and snippets
90 | * Updates the **Memory Cache** with the contents, placing the entry at the front of the LRU list
91 |
92 | #### Cache implementation
93 |
94 | The cache can use a doubly-linked list: new items will be added to the head while items to expire will be removed from the tail. We'll use a hash table for fast lookups to each linked list node.
95 |
96 | **Clarify with your interviewer how much code you are expected to write**.
97 |
98 | **Query API Server** implementation:
99 |
100 | ```
101 | class QueryApi(object):
102 |
103 | def __init__(self, memory_cache, reverse_index_service):
104 | self.memory_cache = memory_cache
105 | self.reverse_index_service = reverse_index_service
106 |
107 | def parse_query(self, query):
108 | """Remove markup, break text into terms, deal with typos,
109 | normalize capitalization, convert to use boolean operations.
110 | """
111 | ...
112 |
113 | def process_query(self, query):
114 | query = self.parse_query(query)
115 | results = self.memory_cache.get(query)
116 | if results is None:
117 | results = self.reverse_index_service.process_search(query)
118 | self.memory_cache.set(query, results)
119 | return results
120 | ```
121 |
122 | **Node** implementation:
123 |
124 | ```
125 | class Node(object):
126 |
127 | def __init__(self, query, results):
128 | self.query = query
129 | self.results = results
130 | ```
131 |
132 | **LinkedList** implementation:
133 |
134 | ```
135 | class LinkedList(object):
136 |
137 | def __init__(self):
138 | self.head = None
139 | self.tail = None
140 |
141 | def move_to_front(self, node):
142 | ...
143 |
144 | def append_to_front(self, node):
145 | ...
146 |
147 | def remove_from_tail(self):
148 | ...
149 | ```
150 |
151 | **Cache** implementation:
152 |
153 | ```
154 | class Cache(object):
155 |
156 | def __init__(self, MAX_SIZE):
157 | self.MAX_SIZE = MAX_SIZE
158 | self.size = 0
159 | self.lookup = {} # key: query, value: node
160 | self.linked_list = LinkedList()
161 |
162 | def get(self, query)
163 | """Get the stored query result from the cache.
164 |
165 | Accessing a node updates its position to the front of the LRU list.
166 | """
167 | node = self.lookup[query]
168 | if node is None:
169 | return None
170 | self.linked_list.move_to_front(node)
171 | return node.results
172 |
173 | def set(self, results, query):
174 | """Set the result for the given query key in the cache.
175 |
176 | When updating an entry, updates its position to the front of the LRU list.
177 | If the entry is new and the cache is at capacity, removes the oldest entry
178 | before the new entry is added.
179 | """
180 | node = self.lookup[query]
181 | if node is not None:
182 | # Key exists in cache, update the value
183 | node.results = results
184 | self.linked_list.move_to_front(node)
185 | else:
186 | # Key does not exist in cache
187 | if self.size == self.MAX_SIZE:
188 | # Remove the oldest entry from the linked list and lookup
189 | self.lookup.pop(self.linked_list.tail.query, None)
190 | self.linked_list.remove_from_tail()
191 | else:
192 | self.size += 1
193 | # Add the new key and value
194 | new_node = Node(query, results)
195 | self.linked_list.append_to_front(new_node)
196 | self.lookup[query] = new_node
197 | ```
198 |
199 | #### When to update the cache
200 |
201 | The cache should be updated when:
202 |
203 | * The page contents change
204 | * The page is removed or a new page is added
205 | * The page rank changes
206 |
207 | The most straightforward way to handle these cases is to simply set a max time that a cached entry can stay in the cache before it is updated, usually referred to as time to live (TTL).
208 |
209 | Refer to [When to update the cache](https://github.com/donnemartin/system-design-primer#when-to-update-the-cache) for tradeoffs and alternatives. The approach above describes [cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside).
210 |
211 | ## Step 4: Scale the design
212 |
213 | > Identify and address bottlenecks, given the constraints.
214 |
215 | 
216 |
217 | **Important: Do not simply jump right into the final design from the initial design!**
218 |
219 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
220 |
221 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
222 |
223 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
224 |
225 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
226 |
227 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
228 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
229 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
230 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
231 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
232 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
233 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
234 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
235 |
236 | ### Expanding the Memory Cache to many machines
237 |
238 | To handle the heavy request load and the large amount of memory needed, we'll scale horizontally. We have three main options on how to store the data on our **Memory Cache** cluster:
239 |
240 | * **Each machine in the cache cluster has its own cache** - Simple, although it will likely result in a low cache hit rate.
241 | * **Each machine in the cache cluster has a copy of the cache** - Simple, although it is an inefficient use of memory.
242 | * **The cache is [sharded](https://github.com/donnemartin/system-design-primer#sharding) across all machines in the cache cluster** - More complex, although it is likely the best option. We could use hashing to determine which machine could have the cached results of a query using `machine = hash(query)`. We'll likely want to use [consistent hashing](https://github.com/donnemartin/system-design-primer#under-development).
243 |
244 | ## Additional talking points
245 |
246 | > Additional topics to dive into, depending on the problem scope and time remaining.
247 |
248 | ### SQL scaling patterns
249 |
250 | * [Read replicas](https://github.com/donnemartin/system-design-primer#master-slave)
251 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
252 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
253 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
254 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
255 |
256 | #### NoSQL
257 |
258 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
259 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
260 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
261 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
262 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
263 |
264 | ### Caching
265 |
266 | * Where to cache
267 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
268 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
269 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
270 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
271 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
272 | * What to cache
273 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
274 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
275 | * When to update the cache
276 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
277 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
278 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
279 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
280 |
281 | ### Asynchronism and microservices
282 |
283 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
284 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
285 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
286 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
287 |
288 | ### Communications
289 |
290 | * Discuss tradeoffs:
291 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
292 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
293 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
294 |
295 | ### Security
296 |
297 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
298 |
299 | ### Latency numbers
300 |
301 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
302 |
303 | ### Ongoing
304 |
305 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
306 | * Scaling is an iterative process
307 |
--------------------------------------------------------------------------------
/solutions/system_design/query_cache/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/query_cache/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/query_cache/query_cache.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/query_cache/query_cache.png
--------------------------------------------------------------------------------
/solutions/system_design/query_cache/query_cache_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/query_cache/query_cache_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/query_cache/query_cache_snippets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class QueryApi(object):
4 |
5 | def __init__(self, memory_cache, reverse_index_cluster):
6 | self.memory_cache = memory_cache
7 | self.reverse_index_cluster = reverse_index_cluster
8 |
9 | def parse_query(self, query):
10 | """Remove markup, break text into terms, deal with typos,
11 | normalize capitalization, convert to use boolean operations.
12 | """
13 | ...
14 |
15 | def process_query(self, query):
16 | query = self.parse_query(query)
17 | results = self.memory_cache.get(query)
18 | if results is None:
19 | results = self.reverse_index_cluster.process_search(query)
20 | self.memory_cache.set(query, results)
21 | return results
22 |
23 |
24 | class Node(object):
25 |
26 | def __init__(self, query, results):
27 | self.query = query
28 | self.results = results
29 |
30 |
31 | class LinkedList(object):
32 |
33 | def __init__(self):
34 | self.head = None
35 | self.tail = None
36 |
37 | def move_to_front(self, node):
38 | ...
39 |
40 | def append_to_front(self, node):
41 | ...
42 |
43 | def remove_from_tail(self):
44 | ...
45 |
46 |
47 | class Cache(object):
48 |
49 | def __init__(self, MAX_SIZE):
50 | self.MAX_SIZE = MAX_SIZE
51 | self.size = 0
52 | self.lookup = {}
53 | self.linked_list = LinkedList()
54 |
55 | def get(self, query)
56 | """Get the stored query result from the cache.
57 |
58 | Accessing a node updates its position to the front of the LRU list.
59 | """
60 | node = self.lookup[query]
61 | if node is None:
62 | return None
63 | self.linked_list.move_to_front(node)
64 | return node.results
65 |
66 | def set(self, results, query):
67 | """Set the result for the given query key in the cache.
68 |
69 | When updating an entry, updates its position to the front of the LRU list.
70 | If the entry is new and the cache is at capacity, removes the oldest entry
71 | before the new entry is added.
72 | """
73 | node = self.map[query]
74 | if node is not None:
75 | # Key exists in cache, update the value
76 | node.results = results
77 | self.linked_list.move_to_front(node)
78 | else:
79 | # Key does not exist in cache
80 | if self.size == self.MAX_SIZE:
81 | # Remove the oldest entry from the linked list and lookup
82 | self.lookup.pop(self.linked_list.tail.query, None)
83 | self.linked_list.remove_from_tail()
84 | else:
85 | self.size += 1
86 | # Add the new key and value
87 | new_node = Node(query, results)
88 | self.linked_list.append_to_front(new_node)
89 | self.lookup[query] = new_node
90 |
--------------------------------------------------------------------------------
/solutions/system_design/sales_rank/README.md:
--------------------------------------------------------------------------------
1 | # Design Amazon's sales rank by category feature
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | #### We'll scope the problem to handle only the following use case
16 |
17 | * **Service** calculates the past week's most popular products by category
18 | * **User** views the past week's most popular products by category
19 | * **Service** has high availability
20 |
21 | #### Out of scope
22 |
23 | * The general e-commerce site
24 | * Design components only for calculating sales rank
25 |
26 | ### Constraints and assumptions
27 |
28 | #### State assumptions
29 |
30 | * Traffic is not evenly distributed
31 | * Items can be in multiple categories
32 | * Items cannot change categories
33 | * There are no subcategories ie `foo/bar/baz`
34 | * Results must be updated hourly
35 | * More popular products might need to be updated more frequently
36 | * 10 million products
37 | * 1000 categories
38 | * 1 billion transactions per month
39 | * 100 billion read requests per month
40 | * 100:1 read to write ratio
41 |
42 | #### Calculate usage
43 |
44 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
45 |
46 | * Size per transaction:
47 | * `created_at` - 5 bytes
48 | * `product_id` - 8 bytes
49 | * `category_id` - 4 bytes
50 | * `seller_id` - 8 bytes
51 | * `buyer_id` - 8 bytes
52 | * `quantity` - 4 bytes
53 | * `total_price` - 5 bytes
54 | * Total: ~40 bytes
55 | * 40 GB of new transaction content per month
56 | * 40 bytes per transaction * 1 billion transactions per month
57 | * 1.44 TB of new transaction content in 3 years
58 | * Assume most are new transactions instead of updates to existing ones
59 | * 400 transactions per second on average
60 | * 40,000 read requests per second on average
61 |
62 | Handy conversion guide:
63 |
64 | * 2.5 million seconds per month
65 | * 1 request per second = 2.5 million requests per month
66 | * 40 requests per second = 100 million requests per month
67 | * 400 requests per second = 1 billion requests per month
68 |
69 | ## Step 2: Create a high level design
70 |
71 | > Outline a high level design with all important components.
72 |
73 | 
74 |
75 | ## Step 3: Design core components
76 |
77 | > Dive into details for each core component.
78 |
79 | ### Use case: Service calculates the past week's most popular products by category
80 |
81 | We could store the raw **Sales API** server log files on a managed **Object Store** such as Amazon S3, rather than managing our own distributed file system.
82 |
83 | **Clarify with your interviewer how much code you are expected to write**.
84 |
85 | We'll assume this is a sample log entry, tab delimited:
86 |
87 | ```
88 | timestamp product_id category_id qty total_price seller_id buyer_id
89 | t1 product1 category1 2 20.00 1 1
90 | t2 product1 category2 2 20.00 2 2
91 | t2 product1 category2 1 10.00 2 3
92 | t3 product2 category1 3 7.00 3 4
93 | t4 product3 category2 7 2.00 4 5
94 | t5 product4 category1 1 5.00 5 6
95 | ...
96 | ```
97 |
98 | The **Sales Rank Service** could use **MapReduce**, using the **Sales API** server log files as input and writing the results to an aggregate table `sales_rank` in a **SQL Database**. We should discuss the [use cases and tradeoffs between choosing SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql).
99 |
100 | We'll use a multi-step **MapReduce**:
101 |
102 | * **Step 1** - Transform the data to `(category, product_id), sum(quantity)`
103 | * **Step 2** - Perform a distributed sort
104 |
105 | ```
106 | class SalesRanker(MRJob):
107 |
108 | def within_past_week(self, timestamp):
109 | """Return True if timestamp is within past week, False otherwise."""
110 | ...
111 |
112 | def mapper(self, _ line):
113 | """Parse each log line, extract and transform relevant lines.
114 |
115 | Emit key value pairs of the form:
116 |
117 | (category1, product1), 2
118 | (category2, product1), 2
119 | (category2, product1), 1
120 | (category1, product2), 3
121 | (category2, product3), 7
122 | (category1, product4), 1
123 | """
124 | timestamp, product_id, category_id, quantity, total_price, seller_id, \
125 | buyer_id = line.split('\t')
126 | if self.within_past_week(timestamp):
127 | yield (category_id, product_id), quantity
128 |
129 | def reducer(self, key, value):
130 | """Sum values for each key.
131 |
132 | (category1, product1), 2
133 | (category2, product1), 3
134 | (category1, product2), 3
135 | (category2, product3), 7
136 | (category1, product4), 1
137 | """
138 | yield key, sum(values)
139 |
140 | def mapper_sort(self, key, value):
141 | """Construct key to ensure proper sorting.
142 |
143 | Transform key and value to the form:
144 |
145 | (category1, 2), product1
146 | (category2, 3), product1
147 | (category1, 3), product2
148 | (category2, 7), product3
149 | (category1, 1), product4
150 |
151 | The shuffle/sort step of MapReduce will then do a
152 | distributed sort on the keys, resulting in:
153 |
154 | (category1, 1), product4
155 | (category1, 2), product1
156 | (category1, 3), product2
157 | (category2, 3), product1
158 | (category2, 7), product3
159 | """
160 | category_id, product_id = key
161 | quantity = value
162 | yield (category_id, quantity), product_id
163 |
164 | def reducer_identity(self, key, value):
165 | yield key, value
166 |
167 | def steps(self):
168 | """Run the map and reduce steps."""
169 | return [
170 | self.mr(mapper=self.mapper,
171 | reducer=self.reducer),
172 | self.mr(mapper=self.mapper_sort,
173 | reducer=self.reducer_identity),
174 | ]
175 | ```
176 |
177 | The result would be the following sorted list, which we could insert into the `sales_rank` table:
178 |
179 | ```
180 | (category1, 1), product4
181 | (category1, 2), product1
182 | (category1, 3), product2
183 | (category2, 3), product1
184 | (category2, 7), product3
185 | ```
186 |
187 | The `sales_rank` table could have the following structure:
188 |
189 | ```
190 | id int NOT NULL AUTO_INCREMENT
191 | category_id int NOT NULL
192 | total_sold int NOT NULL
193 | product_id int NOT NULL
194 | PRIMARY KEY(id)
195 | FOREIGN KEY(category_id) REFERENCES Categories(id)
196 | FOREIGN KEY(product_id) REFERENCES Products(id)
197 | ```
198 |
199 | We'll create an [index](https://github.com/donnemartin/system-design-primer#use-good-indices) on `id `, `category_id`, and `product_id` to speed up lookups (log-time instead of scanning the entire table) and to keep the data in memory. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
200 |
201 | ### Use case: User views the past week's most popular products by category
202 |
203 | * The **Client** sends a request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
204 | * The **Web Server** forwards the request to the **Read API** server
205 | * The **Read API** server reads from the **SQL Database** `sales_rank` table
206 |
207 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
208 |
209 | ```
210 | $ curl https://amazon.com/api/v1/popular?category_id=1234
211 | ```
212 |
213 | Response:
214 |
215 | ```
216 | {
217 | "id": "100",
218 | "category_id": "1234",
219 | "total_sold": "100000",
220 | "product_id": "50",
221 | },
222 | {
223 | "id": "53",
224 | "category_id": "1234",
225 | "total_sold": "90000",
226 | "product_id": "200",
227 | },
228 | {
229 | "id": "75",
230 | "category_id": "1234",
231 | "total_sold": "80000",
232 | "product_id": "3",
233 | },
234 | ```
235 |
236 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
237 |
238 | ## Step 4: Scale the design
239 |
240 | > Identify and address bottlenecks, given the constraints.
241 |
242 | 
243 |
244 | **Important: Do not simply jump right into the final design from the initial design!**
245 |
246 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
247 |
248 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
249 |
250 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
251 |
252 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
253 |
254 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
255 | * [CDN](https://github.com/donnemartin/system-design-primer#content-delivery-network)
256 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
257 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
258 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
259 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
260 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
261 | * [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms)
262 | * [SQL write master-slave failover](https://github.com/donnemartin/system-design-primer#fail-over)
263 | * [Master-slave replication](https://github.com/donnemartin/system-design-primer#master-slave-replication)
264 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
265 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
266 |
267 | The **Analytics Database** could use a data warehousing solution such as Amazon Redshift or Google BigQuery.
268 |
269 | We might only want to store a limited time period of data in the database, while storing the rest in a data warehouse or in an **Object Store**. An **Object Store** such as Amazon S3 can comfortably handle the constraint of 40 GB of new content per month.
270 |
271 | To address the 40,000 *average* read requests per second (higher at peak), traffic for popular content (and their sales rank) should be handled by the **Memory Cache** instead of the database. The **Memory Cache** is also useful for handling the unevenly distributed traffic and traffic spikes. With the large volume of reads, the **SQL Read Replicas** might not be able to handle the cache misses. We'll probably need to employ additional SQL scaling patterns.
272 |
273 | 400 *average* writes per second (higher at peak) might be tough for a single **SQL Write Master-Slave**, also pointing to a need for additional scaling techniques.
274 |
275 | SQL scaling patterns include:
276 |
277 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
278 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
279 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
280 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
281 |
282 | We should also consider moving some data to a **NoSQL Database**.
283 |
284 | ## Additional talking points
285 |
286 | > Additional topics to dive into, depending on the problem scope and time remaining.
287 |
288 | #### NoSQL
289 |
290 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
291 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
292 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
293 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
294 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
295 |
296 | ### Caching
297 |
298 | * Where to cache
299 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
300 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
301 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
302 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
303 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
304 | * What to cache
305 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
306 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
307 | * When to update the cache
308 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
309 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
310 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
311 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
312 |
313 | ### Asynchronism and microservices
314 |
315 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
316 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
317 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
318 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
319 |
320 | ### Communications
321 |
322 | * Discuss tradeoffs:
323 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
324 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
325 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
326 |
327 | ### Security
328 |
329 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
330 |
331 | ### Latency numbers
332 |
333 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
334 |
335 | ### Ongoing
336 |
337 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
338 | * Scaling is an iterative process
339 |
--------------------------------------------------------------------------------
/solutions/system_design/sales_rank/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/sales_rank/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/sales_rank/sales_rank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/sales_rank/sales_rank.png
--------------------------------------------------------------------------------
/solutions/system_design/sales_rank/sales_rank_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/sales_rank/sales_rank_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/sales_rank/sales_rank_mapreduce.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from mrjob.job import MRJob
4 |
5 |
6 | class SalesRanker(MRJob):
7 |
8 | def within_past_week(self, timestamp):
9 | """Return True if timestamp is within past week, False otherwise."""
10 | ...
11 |
12 | def mapper(self, _ line):
13 | """Parse each log line, extract and transform relevant lines.
14 |
15 | Emit key value pairs of the form:
16 |
17 | (foo, p1), 2
18 | (bar, p1), 2
19 | (bar, p1), 1
20 | (foo, p2), 3
21 | (bar, p3), 10
22 | (foo, p4), 1
23 | """
24 | timestamp, product_id, category, quantity = line.split('\t')
25 | if self.within_past_week(timestamp):
26 | yield (category, product_id), quantity
27 |
28 | def reducer(self, key, value):
29 | """Sum values for each key.
30 |
31 | (foo, p1), 2
32 | (bar, p1), 3
33 | (foo, p2), 3
34 | (bar, p3), 10
35 | (foo, p4), 1
36 | """
37 | yield key, sum(values)
38 |
39 | def mapper_sort(self, key, value):
40 | """Construct key to ensure proper sorting.
41 |
42 | Transform key and value to the form:
43 |
44 | (foo, 2), p1
45 | (bar, 3), p1
46 | (foo, 3), p2
47 | (bar, 10), p3
48 | (foo, 1), p4
49 |
50 | The shuffle/sort step of MapReduce will then do a
51 | distributed sort on the keys, resulting in:
52 |
53 | (category1, 1), product4
54 | (category1, 2), product1
55 | (category1, 3), product2
56 | (category2, 3), product1
57 | (category2, 7), product3
58 | """
59 | category, product_id = key
60 | quantity = value
61 | yield (category, quantity), product_id
62 |
63 | def reducer_identity(self, key, value):
64 | yield key, value
65 |
66 | def steps(self):
67 | """Run the map and reduce steps."""
68 | return [
69 | self.mr(mapper=self.mapper,
70 | reducer=self.reducer),
71 | self.mr(mapper=self.mapper_sort,
72 | reducer=self.reducer_identity),
73 | ]
74 |
75 |
76 | if __name__ == '__main__':
77 | HitCounts.run()
78 |
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/README.md:
--------------------------------------------------------------------------------
1 | # Design a system that scales to millions of users on AWS
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | Solving this problem takes an iterative approach of: 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat, which is good pattern for evolving basic designs to scalable designs.
16 |
17 | Unless you have a background in AWS or are applying for a position that requires AWS knowledge, AWS-specific details are not a requirement. However, **much of the principles discussed in this exercise can apply more generally outside of the AWS ecosystem.**
18 |
19 | #### We'll scope the problem to handle only the following use cases
20 |
21 | * **User** makes a read or write request
22 | * **Service** does processing, stores user data, then returns the results
23 | * **Service** needs to evolve from serving a small amount of users to millions of users
24 | * Discuss general scaling patterns as we evolve an architecture to handle a large number of users and requests
25 | * **Service** has high availability
26 |
27 | ### Constraints and assumptions
28 |
29 | #### State assumptions
30 |
31 | * Traffic is not evenly distributed
32 | * Need for relational data
33 | * Scale from 1 user to tens of millions of users
34 | * Denote increase of users as:
35 | * Users+
36 | * Users++
37 | * Users+++
38 | * ...
39 | * 10 million users
40 | * 1 billion writes per month
41 | * 100 billion reads per month
42 | * 100:1 read to write ratio
43 | * 1 KB content per write
44 |
45 | #### Calculate usage
46 |
47 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
48 |
49 | * 1 TB of new content per month
50 | * 1 KB per write * 1 billion writes per month
51 | * 36 TB of new content in 3 years
52 | * Assume most writes are from new content instead of updates to existing ones
53 | * 400 writes per second on average
54 | * 40,000 reads per second on average
55 |
56 | Handy conversion guide:
57 |
58 | * 2.5 million seconds per month
59 | * 1 request per second = 2.5 million requests per month
60 | * 40 requests per second = 100 million requests per month
61 | * 400 requests per second = 1 billion requests per month
62 |
63 | ## Step 2: Create a high level design
64 |
65 | > Outline a high level design with all important components.
66 |
67 | 
68 |
69 | ## Step 3: Design core components
70 |
71 | > Dive into details for each core component.
72 |
73 | ### Use case: User makes a read or write request
74 |
75 | #### Goals
76 |
77 | * With only 1-2 users, you only need a basic setup
78 | * Single box for simplicity
79 | * Vertical scaling when needed
80 | * Monitor to determine bottlenecks
81 |
82 | #### Start with a single box
83 |
84 | * **Web server** on EC2
85 | * Storage for user data
86 | * [**MySQL Database**](https://github.com/donnemartin/system-design-primer#sql)
87 |
88 | Use **Vertical Scaling**:
89 |
90 | * Simply choose a bigger box
91 | * Keep an eye on metrics to determine how to scale up
92 | * Use basic monitoring to determine bottlenecks: CPU, memory, IO, network, etc
93 | * CloudWatch, top, nagios, statsd, graphite, etc
94 | * Scaling vertically can get very expensive
95 | * No redundancy/failover
96 |
97 | *Trade-offs, alternatives, and additional details:*
98 |
99 | * The alternative to **Vertical Scaling** is [**Horizontal scaling**](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
100 |
101 | #### Start with SQL, consider NoSQL
102 |
103 | The constraints assume there is a need for relational data. We can start off using a **MySQL Database** on the single box.
104 |
105 | *Trade-offs, alternatives, and additional details:*
106 |
107 | * See the [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms) section
108 | * Discuss reasons to use [SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
109 |
110 | #### Assign a public static IP
111 |
112 | * Elastic IPs provide a public endpoint whose IP doesn't change on reboot
113 | * Helps with failover, just point the domain to a new IP
114 |
115 | #### Use a DNS
116 |
117 | Add a **DNS** such as Route 53 to map the domain to the instance's public IP.
118 |
119 | *Trade-offs, alternatives, and additional details:*
120 |
121 | * See the [Domain name system](https://github.com/donnemartin/system-design-primer#domain-name-system) section
122 |
123 | #### Secure the web server
124 |
125 | * Open up only necessary ports
126 | * Allow the web server to respond to incoming requests from:
127 | * 80 for HTTP
128 | * 443 for HTTPS
129 | * 22 for SSH to only whitelisted IPs
130 | * Prevent the web server from initiating outbound connections
131 |
132 | *Trade-offs, alternatives, and additional details:*
133 |
134 | * See the [Security](https://github.com/donnemartin/system-design-primer#security) section
135 |
136 | ## Step 4: Scale the design
137 |
138 | > Identify and address bottlenecks, given the constraints.
139 |
140 | ### Users+
141 |
142 | 
143 |
144 | #### Assumptions
145 |
146 | Our user count is starting to pick up and the load is increasing on our single box. Our **Benchmarks/Load Tests** and **Profiling** are pointing to the **MySQL Database** taking up more and more memory and CPU resources, while the user content is filling up disk space.
147 |
148 | We've been able to address these issues with **Vertical Scaling** so far. Unfortunately, this has become quite expensive and it doesn't allow for independent scaling of the **MySQL Database** and **Web Server**.
149 |
150 | #### Goals
151 |
152 | * Lighten load on the single box and allow for independent scaling
153 | * Store static content separately in an **Object Store**
154 | * Move the **MySQL Database** to a separate box
155 | * Disadvantages
156 | * These changes would increase complexity and would require changes to the **Web Server** to point to the **Object Store** and the **MySQL Database**
157 | * Additional security measures must be taken to secure the new components
158 | * AWS costs could also increase, but should be weighed with the costs of managing similar systems on your own
159 |
160 | #### Store static content separately
161 |
162 | * Consider using a managed **Object Store** like S3 to store static content
163 | * Highly scalable and reliable
164 | * Server side encryption
165 | * Move static content to S3
166 | * User files
167 | * JS
168 | * CSS
169 | * Images
170 | * Videos
171 |
172 | #### Move the MySQL database to a separate box
173 |
174 | * Consider using a service like RDS to manage the **MySQL Database**
175 | * Simple to administer, scale
176 | * Multiple availability zones
177 | * Encryption at rest
178 |
179 | #### Secure the system
180 |
181 | * Encrypt data in transit and at rest
182 | * Use a Virtual Private Cloud
183 | * Create a public subnet for the single **Web Server** so it can send and receive traffic from the internet
184 | * Create a private subnet for everything else, preventing outside access
185 | * Only open ports from whitelisted IPs for each component
186 | * These same patterns should be implemented for new components in the remainder of the exercise
187 |
188 | *Trade-offs, alternatives, and additional details:*
189 |
190 | * See the [Security](https://github.com/donnemartin/system-design-primer#security) section
191 |
192 | ### Users++
193 |
194 | 
195 |
196 | #### Assumptions
197 |
198 | Our **Benchmarks/Load Tests** and **Profiling** show that our single **Web Server** bottlenecks during peak hours, resulting in slow responses and in some cases, downtime. As the service matures, we'd also like to move towards higher availability and redundancy.
199 |
200 | #### Goals
201 |
202 | * The following goals attempt to address the scaling issues with the **Web Server**
203 | * Based on the **Benchmarks/Load Tests** and **Profiling**, you might only need to implement one or two of these techniques
204 | * Use [**Horizontal Scaling**](https://github.com/donnemartin/system-design-primer#horizontal-scaling) to handle increasing loads and to address single points of failure
205 | * Add a [**Load Balancer**](https://github.com/donnemartin/system-design-primer#load-balancer) such as Amazon's ELB or HAProxy
206 | * ELB is highly available
207 | * If you are configuring your own **Load Balancer**, setting up multiple servers in [active-active](https://github.com/donnemartin/system-design-primer#active-active) or [active-passive](https://github.com/donnemartin/system-design-primer#active-passive) in multiple availability zones will improve availability
208 | * Terminate SSL on the **Load Balancer** to reduce computational load on backend servers and to simplify certificate administration
209 | * Use multiple **Web Servers** spread out over multiple availability zones
210 | * Use multiple **MySQL** instances in [**Master-Slave Failover**](https://github.com/donnemartin/system-design-primer#master-slave-replication) mode across multiple availability zones to improve redundancy
211 | * Separate out the **Web Servers** from the [**Application Servers**](https://github.com/donnemartin/system-design-primer#application-layer)
212 | * Scale and configure both layers independently
213 | * **Web Servers** can run as a [**Reverse Proxy**](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
214 | * For example, you can add **Application Servers** handling **Read APIs** while others handle **Write APIs**
215 | * Move static (and some dynamic) content to a [**Content Delivery Network (CDN)**](https://github.com/donnemartin/system-design-primer#content-delivery-network) such as CloudFront to reduce load and latency
216 |
217 | *Trade-offs, alternatives, and additional details:*
218 |
219 | * See the linked content above for details
220 |
221 | ### Users+++
222 |
223 | 
224 |
225 | **Note:** **Internal Load Balancers** not shown to reduce clutter
226 |
227 | #### Assumptions
228 |
229 | Our **Benchmarks/Load Tests** and **Profiling** show that we are read-heavy (100:1 with writes) and our database is suffering from poor performance from the high read requests.
230 |
231 | #### Goals
232 |
233 | * The following goals attempt to address the scaling issues with the **MySQL Database**
234 | * Based on the **Benchmarks/Load Tests** and **Profiling**, you might only need to implement one or two of these techniques
235 | * Move the following data to a [**Memory Cache**](https://github.com/donnemartin/system-design-primer#cache) such as Elasticache to reduce load and latency:
236 | * Frequently accessed content from **MySQL**
237 | * First, try to configure the **MySQL Database** cache to see if that is sufficient to relieve the bottleneck before implementing a **Memory Cache**
238 | * Session data from the **Web Servers**
239 | * The **Web Servers** become stateless, allowing for **Autoscaling**
240 | * Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
241 | * Add [**MySQL Read Replicas**](https://github.com/donnemartin/system-design-primer#master-slave-replication) to reduce load on the write master
242 | * Add more **Web Servers** and **Application Servers** to improve responsiveness
243 |
244 | *Trade-offs, alternatives, and additional details:*
245 |
246 | * See the linked content above for details
247 |
248 | #### Add MySQL read replicas
249 |
250 | * In addition to adding and scaling a **Memory Cache**, **MySQL Read Replicas** can also help relieve load on the **MySQL Write Master**
251 | * Add logic to **Web Server** to separate out writes and reads
252 | * Add **Load Balancers** in front of **MySQL Read Replicas** (not pictured to reduce clutter)
253 | * Most services are read-heavy vs write-heavy
254 |
255 | *Trade-offs, alternatives, and additional details:*
256 |
257 | * See the [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms) section
258 |
259 | ### Users++++
260 |
261 | 
262 |
263 | #### Assumptions
264 |
265 | Our **Benchmarks/Load Tests** and **Profiling** show that our traffic spikes during regular business hours in the U.S. and drop significantly when users leave the office. We think we can cut costs by automatically spinning up and down servers based on actual load. We're a small shop so we'd like to automate as much of the DevOps as possible for **Autoscaling** and for the general operations.
266 |
267 | #### Goals
268 |
269 | * Add **Autoscaling** to provision capacity as needed
270 | * Keep up with traffic spikes
271 | * Reduce costs by powering down unused instances
272 | * Automate DevOps
273 | * Chef, Puppet, Ansible, etc
274 | * Continue monitoring metrics to address bottlenecks
275 | * **Host level** - Review a single EC2 instance
276 | * **Aggregate level** - Review load balancer stats
277 | * **Log analysis** - CloudWatch, CloudTrail, Loggly, Splunk, Sumo
278 | * **External site performance** - Pingdom or New Relic
279 | * **Handle notifications and incidents** - PagerDuty
280 | * **Error Reporting** - Sentry
281 |
282 | #### Add autoscaling
283 |
284 | * Consider a managed service such as AWS **Autoscaling**
285 | * Create one group for each **Web Server** and one for each **Application Server** type, place each group in multiple availability zones
286 | * Set a min and max number of instances
287 | * Trigger to scale up and down through CloudWatch
288 | * Simple time of day metric for predictable loads or
289 | * Metrics over a time period:
290 | * CPU load
291 | * Latency
292 | * Network traffic
293 | * Custom metric
294 | * Disadvantages
295 | * Autoscaling can introduce complexity
296 | * It could take some time before a system appropriately scales up to meet increased demand, or to scale down when demand drops
297 |
298 | ### Users+++++
299 |
300 | 
301 |
302 | **Note:** **Autoscaling** groups not shown to reduce clutter
303 |
304 | #### Assumptions
305 |
306 | As the service continues to grow towards the figures outlined in the constraints, we iteratively run **Benchmarks/Load Tests** and **Profiling** to uncover and address new bottlenecks.
307 |
308 | #### Goals
309 |
310 | We'll continue to address scaling issues due to the problem's constraints:
311 |
312 | * If our **MySQL Database** starts to grow too large, we might considering only storing a limited time period of data in the database, while storing the rest in a data warehouse such as Redshift
313 | * A data warehouse such as Redshift can comfortably handle the constraint of 1 TB of new content per month
314 | * With 40,000 average read requests per second, read traffic for popular content can be addressed by scaling the **Memory Cache**, which is also useful for handling the unevenly distributed traffic and traffic spikes
315 | * The **SQL Read Replicas** might have trouble handling the cache misses, we'll probably need to employ additional SQL scaling patterns
316 | * 400 average writes per second (with presumably significantly higher peaks) might be tough for a single **SQL Write Master-Slave**, also pointing to a need for additional scaling techniques
317 |
318 | SQL scaling patterns include:
319 |
320 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
321 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
322 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
323 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
324 |
325 | To further address the high read and write requests, we should also consider moving appropriate data to a [**NoSQL Database**](https://github.com/donnemartin/system-design-primer#nosql) such as DynamoDB.
326 |
327 | We can further separate out our [**Application Servers**](https://github.com/donnemartin/system-design-primer#application-layer) to allow for independent scaling. Batch processes or computations that do not need to be done in real-time can be done [**Asynchronously**](https://github.com/donnemartin/system-design-primer#asynchronism) with **Queues** and **Workers**:
328 |
329 | * For example, in a photo service, the photo upload and the thumbnail creation can be separated:
330 | * **Client** uploads photo
331 | * **Application Server** puts a job in a **Queue** such as SQS
332 | * The **Worker Service** on EC2 or Lambda pulls work off the **Queue** then:
333 | * Creates a thumbnail
334 | * Updates a **Database**
335 | * Stores the thumbnail in the **Object Store**
336 |
337 | *Trade-offs, alternatives, and additional details:*
338 |
339 | * See the linked content above for details
340 |
341 | ## Additional talking points
342 |
343 | > Additional topics to dive into, depending on the problem scope and time remaining.
344 |
345 | ### SQL scaling patterns
346 |
347 | * [Read replicas](https://github.com/donnemartin/system-design-primer#master-slave)
348 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
349 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
350 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
351 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
352 |
353 | #### NoSQL
354 |
355 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
356 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
357 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
358 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
359 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
360 |
361 | ### Caching
362 |
363 | * Where to cache
364 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
365 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
366 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
367 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
368 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
369 | * What to cache
370 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
371 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
372 | * When to update the cache
373 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
374 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
375 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
376 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
377 |
378 | ### Asynchronism and microservices
379 |
380 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
381 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
382 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
383 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
384 |
385 | ### Communications
386 |
387 | * Discuss tradeoffs:
388 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
389 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
390 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
391 |
392 | ### Security
393 |
394 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
395 |
396 | ### Latency numbers
397 |
398 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
399 |
400 | ### Ongoing
401 |
402 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
403 | * Scaling is an iterative process
404 |
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_1.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_2.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_3.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_4.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_5.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_6.png
--------------------------------------------------------------------------------
/solutions/system_design/scaling_aws/scaling_aws_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/scaling_aws/scaling_aws_7.png
--------------------------------------------------------------------------------
/solutions/system_design/social_graph/README.md:
--------------------------------------------------------------------------------
1 | # Design the data structures for a social network
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | #### We'll scope the problem to handle only the following use cases
16 |
17 | * **User** searches for someone and sees the shortest path to the searched person
18 | * **Service** has high availability
19 |
20 | ### Constraints and assumptions
21 |
22 | #### State assumptions
23 |
24 | * Traffic is not evenly distributed
25 | * Some searches are more popular than others, while others are only executed once
26 | * Graph data won't fit on a single machine
27 | * Graph edges are unweighted
28 | * 100 million users
29 | * 50 friends per user average
30 | * 1 billion friend searches per month
31 |
32 | Exercise the use of more traditional systems - don't use graph-specific solutions such as [GraphQL](http://graphql.org/) or a graph database like [Neo4j](https://neo4j.com/)
33 |
34 | #### Calculate usage
35 |
36 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
37 |
38 | * 5 billion friend relationships
39 | * 100 million users * 50 friends per user average
40 | * 400 search requests per second
41 |
42 | Handy conversion guide:
43 |
44 | * 2.5 million seconds per month
45 | * 1 request per second = 2.5 million requests per month
46 | * 40 requests per second = 100 million requests per month
47 | * 400 requests per second = 1 billion requests per month
48 |
49 | ## Step 2: Create a high level design
50 |
51 | > Outline a high level design with all important components.
52 |
53 | 
54 |
55 | ## Step 3: Design core components
56 |
57 | > Dive into details for each core component.
58 |
59 | ### Use case: User searches for someone and sees the shortest path to the searched person
60 |
61 | **Clarify with your interviewer how much code you are expected to write**.
62 |
63 | Without the constraint of millions of users (vertices) and billions of friend relationships (edges), we could solve this unweighted shortest path task with a general BFS approach:
64 |
65 | ```
66 | class Graph(Graph):
67 |
68 | def shortest_path(self, source, dest):
69 | if source is None or dest is None:
70 | return None
71 | if source is dest:
72 | return [source.key]
73 | prev_node_keys = self._shortest_path(source, dest)
74 | if prev_node_keys is None:
75 | return None
76 | else:
77 | path_ids = [dest.key]
78 | prev_node_key = prev_node_keys[dest.key]
79 | while prev_node_key is not None:
80 | path_ids.append(prev_node_key)
81 | prev_node_key = prev_node_keys[prev_node_key]
82 | return path_ids[::-1]
83 |
84 | def _shortest_path(self, source, dest):
85 | queue = deque()
86 | queue.append(source)
87 | prev_node_keys = {source.key: None}
88 | source.visit_state = State.visited
89 | while queue:
90 | node = queue.popleft()
91 | if node is dest:
92 | return prev_node_keys
93 | prev_node = node
94 | for adj_node in node.adj_nodes.values():
95 | if adj_node.visit_state == State.unvisited:
96 | queue.append(adj_node)
97 | prev_node_keys[adj_node.key] = prev_node.key
98 | adj_node.visit_state = State.visited
99 | return None
100 | ```
101 |
102 | We won't be able to fit all users on the same machine, we'll need to [shard](https://github.com/donnemartin/system-design-primer#sharding) users across **Person Servers** and access them with a **Lookup Service**.
103 |
104 | * The **Client** sends a request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
105 | * The **Web Server** forwards the request to the **Search API** server
106 | * The **Search API** server forwards the request to the **User Graph Service**
107 | * The **User Graph Service** does does the following:
108 | * Uses the **Lookup Service** to find the **Person Server** where the current user's info is stored
109 | * Finds the appropriate **Person Server** to retrieve the current user's list of `friend_ids`
110 | * Runs a BFS search using the current user as the `source` and the current user's `friend_ids` as the ids for each `adjacent_node`
111 | * To get the `adjacent_node` from a given id:
112 | * The **User Graph Service** will *again* need to communicate with the **Lookup Service** to determine which **Person Server** stores the`adjacent_node` matching the given id (potential for optimization)
113 |
114 | **Clarify with your interviewer how much code you should be writing**.
115 |
116 | **Note**: Error handling is excluded below for simplicity. Ask if you should code proper error handing.
117 |
118 | **Lookup Service** implementation:
119 |
120 | ```
121 | class LookupService(object):
122 |
123 | def __init__(self):
124 | self.lookup = self._init_lookup() # key: person_id, value: person_server
125 |
126 | def _init_lookup(self):
127 | ...
128 |
129 | def lookup_person_server(self, person_id):
130 | return self.lookup[person_id]
131 | ```
132 |
133 | **Person Server** implementation:
134 |
135 | ```
136 | class PersonServer(object):
137 |
138 | def __init__(self):
139 | self.people = {} # key: person_id, value: person
140 |
141 | def add_person(self, person):
142 | ...
143 |
144 | def people(self, ids):
145 | results = []
146 | for id in ids:
147 | if id in self.people:
148 | results.append(self.people[id])
149 | return results
150 | ```
151 |
152 | **Person** implementation:
153 |
154 | ```
155 | class Person(object):
156 |
157 | def __init__(self, id, name, friend_ids):
158 | self.id = id
159 | self.name = name
160 | self.friend_ids = friend_ids
161 | ```
162 |
163 | **User Graph Service** implementation:
164 |
165 | ```
166 | class UserGraphService(object):
167 |
168 | def __init__(self, lookup_service):
169 | self.lookup_service = lookup_service
170 |
171 | def person(self, person_id):
172 | person_server = self.lookup_service.lookup_person_server(person_id)
173 | return person_server.people([person_id])
174 |
175 | def shortest_path(self, source_key, dest_key):
176 | if source_key is None or dest_key is None:
177 | return None
178 | if source_key is dest_key:
179 | return [source_key]
180 | prev_node_keys = self._shortest_path(source_key, dest_key)
181 | if prev_node_keys is None:
182 | return None
183 | else:
184 | # Iterate through the path_ids backwards, starting at dest_key
185 | path_ids = [dest_key]
186 | prev_node_key = prev_node_keys[dest_key]
187 | while prev_node_key is not None:
188 | path_ids.append(prev_node_key)
189 | prev_node_key = prev_node_keys[prev_node_key]
190 | # Reverse the list since we iterated backwards
191 | return path_ids[::-1]
192 |
193 | def _shortest_path(self, source_key, dest_key, path):
194 | # Use the id to get the Person
195 | source = self.person(source_key)
196 | # Update our bfs queue
197 | queue = deque()
198 | queue.append(source)
199 | # prev_node_keys keeps track of each hop from
200 | # the source_key to the dest_key
201 | prev_node_keys = {source_key: None}
202 | # We'll use visited_ids to keep track of which nodes we've
203 | # visited, which can be different from a typical bfs where
204 | # this can be stored in the node itself
205 | visited_ids = set()
206 | visited_ids.add(source.id)
207 | while queue:
208 | node = queue.popleft()
209 | if node.key is dest_key:
210 | return prev_node_keys
211 | prev_node = node
212 | for friend_id in node.friend_ids:
213 | if friend_id not in visited_ids:
214 | friend_node = self.person(friend_id)
215 | queue.append(friend_node)
216 | prev_node_keys[friend_id] = prev_node.key
217 | visited_ids.add(friend_id)
218 | return None
219 | ```
220 |
221 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
222 |
223 | ```
224 | $ curl https://social.com/api/v1/friend_search?person_id=1234
225 | ```
226 |
227 | Response:
228 |
229 | ```
230 | {
231 | "person_id": "100",
232 | "name": "foo",
233 | "link": "https://social.com/foo",
234 | },
235 | {
236 | "person_id": "53",
237 | "name": "bar",
238 | "link": "https://social.com/bar",
239 | },
240 | {
241 | "person_id": "1234",
242 | "name": "baz",
243 | "link": "https://social.com/baz",
244 | },
245 | ```
246 |
247 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
248 |
249 | ## Step 4: Scale the design
250 |
251 | > Identify and address bottlenecks, given the constraints.
252 |
253 | 
254 |
255 | **Important: Do not simply jump right into the final design from the initial design!**
256 |
257 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
258 |
259 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
260 |
261 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
262 |
263 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
264 |
265 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
266 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
267 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
268 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
269 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
270 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
271 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
272 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
273 |
274 | To address the constraint of 400 *average* read requests per second (higher at peak), person data can be served from a **Memory Cache** such as Redis or Memcached to reduce response times and to reduce traffic to downstream services. This could be especially useful for people who do multiple searches in succession and for people who are well-connected. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
275 |
276 | Below are further optimizations:
277 |
278 | * Store complete or partial BFS traversals to speed up subsequent lookups in the **Memory Cache**
279 | * Batch compute offline then store complete or partial BFS traversals to speed up subsequent lookups in a **NoSQL Database**
280 | * Reduce machine jumps by batching together friend lookups hosted on the same **Person Server**
281 | * [Shard](https://github.com/donnemartin/system-design-primer#sharding) **Person Servers** by location to further improve this, as friends generally live closer to each other
282 | * Do two BFS searches at the same time, one starting from the source, and one from the destination, then merge the two paths
283 | * Start the BFS search from people with large numbers of friends, as they are more likely to reduce the number of [degrees of separation](https://en.wikipedia.org/wiki/Six_degrees_of_separation) between the current user and the search target
284 | * Set a limit based on time or number of hops before asking the user if they want to continue searching, as searching could take a considerable amount of time in some cases
285 | * Use a **Graph Database** such as [Neo4j](https://neo4j.com/) or a graph-specific query language such as [GraphQL](http://graphql.org/) (if there were no constraint preventing the use of **Graph Databases**)
286 |
287 | ## Additional talking points
288 |
289 | > Additional topics to dive into, depending on the problem scope and time remaining.
290 |
291 | ### SQL scaling patterns
292 |
293 | * [Read replicas](https://github.com/donnemartin/system-design-primer#master-slave)
294 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
295 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
296 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
297 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
298 |
299 | #### NoSQL
300 |
301 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
302 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
303 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
304 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
305 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
306 |
307 | ### Caching
308 |
309 | * Where to cache
310 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
311 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
312 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
313 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
314 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
315 | * What to cache
316 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
317 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
318 | * When to update the cache
319 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
320 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
321 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
322 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
323 |
324 | ### Asynchronism and microservices
325 |
326 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
327 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
328 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
329 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
330 |
331 | ### Communications
332 |
333 | * Discuss tradeoffs:
334 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
335 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
336 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
337 |
338 | ### Security
339 |
340 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
341 |
342 | ### Latency numbers
343 |
344 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
345 |
346 | ### Ongoing
347 |
348 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
349 | * Scaling is an iterative process
350 |
--------------------------------------------------------------------------------
/solutions/system_design/social_graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/social_graph/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/social_graph/social_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/social_graph/social_graph.png
--------------------------------------------------------------------------------
/solutions/system_design/social_graph/social_graph_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/social_graph/social_graph_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/social_graph/social_graph_snippets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class Graph(object):
4 |
5 | def bfs(self, source, dest):
6 | if source is None:
7 | return False
8 | queue = deque()
9 | queue.append(source)
10 | source.visit_state = State.visited
11 | while queue:
12 | node = queue.popleft()
13 | print(node)
14 | if dest is node:
15 | return True
16 | for adjacent_node in node.adj_nodes.values():
17 | if adjacent_node.visit_state == State.unvisited:
18 | queue.append(adjacent_node)
19 | adjacent_node.visit_state = State.visited
20 | return False
21 |
22 |
23 | class Person(object):
24 |
25 | def __init__(self, id, name):
26 | self.id = id
27 | self.name = name
28 | self.friend_ids = []
29 |
30 |
31 | class LookupService(object):
32 |
33 | def __init__(self):
34 | self.lookup = {} # key: person_id, value: person_server
35 |
36 | def get_person(self, person_id):
37 | person_server = self.lookup[person_id]
38 | return person_server.people[person_id]
39 |
40 |
41 | class PersonServer(object):
42 |
43 | def __init__(self):
44 | self.people = {} # key: person_id, value: person
45 |
46 | def get_people(self, ids):
47 | results = []
48 | for id in ids:
49 | if id in self.people:
50 | results.append(self.people[id])
51 | return results
52 |
53 |
54 | class UserGraphService(object):
55 |
56 | def __init__(self, person_ids, lookup):
57 | self.lookup = lookup
58 | self.person_ids = person_ids
59 | self.visited_ids = set()
60 |
61 | def bfs(self, source, dest):
62 | # Use self.visited_ids to track visited nodes
63 | # Use self.lookup to translate a person_id to a Person
64 |
--------------------------------------------------------------------------------
/solutions/system_design/twitter/README.md:
--------------------------------------------------------------------------------
1 | # Design the Twitter timeline and search
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | **Design the Facebook feed** and **Design Facebook search** are similar questions.
6 |
7 | ## Step 1: Outline use cases and constraints
8 |
9 | > Gather requirements and scope the problem.
10 | > Ask questions to clarify use cases and constraints.
11 | > Discuss assumptions.
12 |
13 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
14 |
15 | ### Use cases
16 |
17 | #### We'll scope the problem to handle only the following use cases
18 |
19 | * **User** posts a tweet
20 | * **Service** pushes tweets to followers, sending push notifications and emails
21 | * **User** views the user timeline (activity from the user)
22 | * **User** views the home timeline (activity from people the user is following)
23 | * **User** searches keywords
24 | * **Service** has high availability
25 |
26 | #### Out of scope
27 |
28 | * **Service** pushes tweets to the Twitter Firehose and other streams
29 | * **Service** strips out tweets based on user's visibility settings
30 | * Hide @reply if the user is not also following the person being replied to
31 | * Respect 'hide retweets' setting
32 | * Analytics
33 |
34 | ### Constraints and assumptions
35 |
36 | #### State assumptions
37 |
38 | General
39 |
40 | * Traffic is not evenly distributed
41 | * Posting a tweet should be fast
42 | * Fanning out a tweet to all of your followers should be fast, unless you have millions of followers
43 | * 100 million active users
44 | * 500 million tweets per day or 15 billion tweets per month
45 | * Each tweet averages a fanout of 10 deliveries
46 | * 5 billion total tweets delivered on fanout per day
47 | * 150 billion tweets delivered on fanout per month
48 | * 250 billion read requests per month
49 | * 10 billion searches per month
50 |
51 | Timeline
52 |
53 | * Viewing the timeline should be fast
54 | * Twitter is more read heavy than write heavy
55 | * Optimize for fast reads of tweets
56 | * Ingesting tweets is write heavy
57 |
58 | Search
59 |
60 | * Searching should be fast
61 | * Search is read-heavy
62 |
63 | #### Calculate usage
64 |
65 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
66 |
67 | * Size per tweet:
68 | * `tweet_id` - 8 bytes
69 | * `user_id` - 32 bytes
70 | * `text` - 140 bytes
71 | * `media` - 10 KB average
72 | * Total: ~10 KB
73 | * 150 TB of new tweet content per month
74 | * 10 KB per tweet * 500 million tweets per day * 30 days per month
75 | * 5.4 PB of new tweet content in 3 years
76 | * 100 thousand read requests per second
77 | * 250 billion read requests per month * (400 requests per second / 1 billion requests per month)
78 | * 6,000 tweets per second
79 | * 15 billion tweets delivered on fanout per month * (400 requests per second / 1 billion requests per month)
80 | * 60 thousand tweets delivered on fanout per second
81 | * 150 billion tweets delivered on fanout per month * (400 requests per second / 1 billion requests per month)
82 | * 4,000 search requests per second
83 |
84 | Handy conversion guide:
85 |
86 | * 2.5 million seconds per month
87 | * 1 request per second = 2.5 million requests per month
88 | * 40 requests per second = 100 million requests per month
89 | * 400 requests per second = 1 billion requests per month
90 |
91 | ## Step 2: Create a high level design
92 |
93 | > Outline a high level design with all important components.
94 |
95 | 
96 |
97 | ## Step 3: Design core components
98 |
99 | > Dive into details for each core component.
100 |
101 | ### Use case: User posts a tweet
102 |
103 | We could store the user's own tweets to populate the user timeline (activity from the user) in a [relational database](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms). We should discuss the [use cases and tradeoffs between choosing SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql).
104 |
105 | Delivering tweets and building the home timeline (activity from people the user is following) is trickier. Fanning out tweets to all followers (60 thousand tweets delivered on fanout per second) will overload a traditional [relational database](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms). We'll probably want to choose a data store with fast writes such as a **NoSQL database** or **Memory Cache**. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
106 |
107 | We could store media such as photos or videos on an **Object Store**.
108 |
109 | * The **Client** posts a tweet to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
110 | * The **Web Server** forwards the request to the **Write API** server
111 | * The **Write API** stores the tweet in the user's timeline on a **SQL database**
112 | * The **Write API** contacts the **Fan Out Service**, which does the following:
113 | * Queries the **User Graph Service** to find the user's followers stored in the **Memory Cache**
114 | * Stores the tweet in the *home timeline of the user's followers* in a **Memory Cache**
115 | * O(n) operation: 1,000 followers = 1,000 lookups and inserts
116 | * Stores the tweet in the **Search Index Service** to enable fast searching
117 | * Stores media in the **Object Store**
118 | * Uses the **Notification Service** to send out push notifications to followers:
119 | * Uses a **Queue** (not pictured) to asynchronously send out notifications
120 |
121 | **Clarify with your interviewer how much code you are expected to write**.
122 |
123 | If our **Memory Cache** is Redis, we could use a native Redis list with the following structure:
124 |
125 | ```
126 | tweet n+2 tweet n+1 tweet n
127 | | 8 bytes 8 bytes 1 byte | 8 bytes 8 bytes 1 byte | 8 bytes 7 bytes 1 byte |
128 | | tweet_id user_id meta | tweet_id user_id meta | tweet_id user_id meta |
129 | ```
130 |
131 | The new tweet would be placed in the **Memory Cache**, which populates user's home timeline (activity from people the user is following).
132 |
133 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
134 |
135 | ```
136 | $ curl -X POST --data '{ "user_id": "123", "auth_token": "ABC123", \
137 | "status": "hello world!", "media_ids": "ABC987" }' \
138 | https://twitter.com/api/v1/tweet
139 | ```
140 |
141 | Response:
142 |
143 | ```
144 | {
145 | "created_at": "Wed Sep 05 00:37:15 +0000 2012",
146 | "status": "hello world!",
147 | "tweet_id": "987",
148 | "user_id": "123",
149 | ...
150 | }
151 | ```
152 |
153 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
154 |
155 | ### Use case: User views the home timeline
156 |
157 | * The **Client** posts a home timeline request to the **Web Server**
158 | * The **Web Server** forwards the request to the **Read API** server
159 | * The **Read API** server contacts the **Timeline Service**, which does the following:
160 | * Gets the timeline data stored in the **Memory Cache**, containing tweet ids and user ids - O(1)
161 | * Queries the **Tweet Info Service** with a [multiget](http://redis.io/commands/mget) to obtain additional info about the tweet ids - O(n)
162 | * Queries the **User Info Service** with a multiget to obtain additional info about the user ids - O(n)
163 |
164 | REST API:
165 |
166 | ```
167 | $ curl https://twitter.com/api/v1/home_timeline?user_id=123
168 | ```
169 |
170 | Response:
171 |
172 | ```
173 | {
174 | "user_id": "456",
175 | "tweet_id": "123",
176 | "status": "foo"
177 | },
178 | {
179 | "user_id": "789",
180 | "tweet_id": "456",
181 | "status": "bar"
182 | },
183 | {
184 | "user_id": "789",
185 | "tweet_id": "579",
186 | "status": "baz"
187 | },
188 | ```
189 |
190 | ### Use case: User views the user timeline
191 |
192 | * The **Client** posts a home timeline request to the **Web Server**
193 | * The **Web Server** forwards the request to the **Read API** server
194 | * The **Read API** retrieves the user timeline from the **SQL Database**
195 |
196 | The REST API would be similar to the home timeline, except all tweets would come from the user as opposed to the people the user is following.
197 |
198 | ### Use case: User searches keywords
199 |
200 | * The **Client** sends a search request to the **Web Server**
201 | * The **Web Server** forwards the request to the **Search API** server
202 | * The **Search API** contacts the **Search Service**, which does the following:
203 | * Parses/tokenizes the input query, determining what needs to be searched
204 | * Removes markup
205 | * Breaks up the text into terms
206 | * Fixes typos
207 | * Normalizes capitalization
208 | * Converts the query to use boolean operations
209 | * Queries the **Search Cluster** (ie [Lucene](https://lucene.apache.org/)) for the results:
210 | * [Scatter gathers](https://github.com/donnemartin/system-design-primer#under-development) each server in the cluster to determine if there are any results for the query
211 | * Merges, ranks, sorts, and returns the results
212 |
213 | REST API:
214 |
215 | ```
216 | $ curl https://twitter.com/api/v1/search?query=hello+world
217 | ```
218 |
219 | The response would be similar to that of the home timeline, except for tweets matching the given query.
220 |
221 | ## Step 4: Scale the design
222 |
223 | > Identify and address bottlenecks, given the constraints.
224 |
225 | 
226 |
227 | **Important: Do not simply jump right into the final design from the initial design!**
228 |
229 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
230 |
231 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
232 |
233 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
234 |
235 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
236 |
237 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
238 | * [CDN](https://github.com/donnemartin/system-design-primer#content-delivery-network)
239 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
240 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
241 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
242 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
243 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
244 | * [Relational database management system (RDBMS)](https://github.com/donnemartin/system-design-primer#relational-database-management-system-rdbms)
245 | * [SQL write master-slave failover](https://github.com/donnemartin/system-design-primer#fail-over)
246 | * [Master-slave replication](https://github.com/donnemartin/system-design-primer#master-slave-replication)
247 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
248 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
249 |
250 | The **Fanout Service** is a potential bottleneck. Twitter users with millions of followers could take several minutes to have their tweets go through the fanout process. This could lead to race conditions with @replies to the tweet, which we could mitigate by re-ordering the tweets at serve time.
251 |
252 | We could also avoid fanning out tweets from highly-followed users. Instead, we could search to find tweets for high-followed users, merge the search results with the user's home timeline results, then re-order the tweets at serve time.
253 |
254 | Additional optimizations include:
255 |
256 | * Keep only several hundred tweets for each home timeline in the **Memory Cache**
257 | * Keep only active users' home timeline info in the **Memory Cache**
258 | * If a user was not previously active in the past 30 days, we could rebuild the timeline from the **SQL Database**
259 | * Query the **User Graph Service** to determine who the user is following
260 | * Get the tweets from the **SQL Database** and add them to the **Memory Cache**
261 | * Store only a month of tweets in the **Tweet Info Service**
262 | * Store only active users in the **User Info Service**
263 | * The **Search Cluster** would likely need to keep the tweets in memory to keep latency low
264 |
265 | We'll also want to address the bottleneck with the **SQL Database**.
266 |
267 | Although the **Memory Cache** should reduce the load on the database, it is unlikely the **SQL Read Replicas** alone would be enough to handle the cache misses. We'll probably need to employ additional SQL scaling patterns.
268 |
269 | The high volume of writes would overwhelm a single **SQL Write Master-Slave**, also pointing to a need for additional scaling techniques.
270 |
271 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
272 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
273 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
274 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
275 |
276 | We should also consider moving some data to a **NoSQL Database**.
277 |
278 | ## Additional talking points
279 |
280 | > Additional topics to dive into, depending on the problem scope and time remaining.
281 |
282 | #### NoSQL
283 |
284 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
285 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
286 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
287 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
288 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
289 |
290 | ### Caching
291 |
292 | * Where to cache
293 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
294 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
295 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
296 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
297 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
298 | * What to cache
299 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
300 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
301 | * When to update the cache
302 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
303 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
304 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
305 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
306 |
307 | ### Asynchronism and microservices
308 |
309 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
310 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
311 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
312 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
313 |
314 | ### Communications
315 |
316 | * Discuss tradeoffs:
317 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
318 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
319 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
320 |
321 | ### Security
322 |
323 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
324 |
325 | ### Latency numbers
326 |
327 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
328 |
329 | ### Ongoing
330 |
331 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
332 | * Scaling is an iterative process
333 |
--------------------------------------------------------------------------------
/solutions/system_design/twitter/twitter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/twitter/twitter.png
--------------------------------------------------------------------------------
/solutions/system_design/twitter/twitter_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/twitter/twitter_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/README.md:
--------------------------------------------------------------------------------
1 | # Design a web crawler
2 |
3 | *Note: This document links directly to relevant areas found in the [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) to avoid duplication. Refer to the linked content for general talking points, tradeoffs, and alternatives.*
4 |
5 | ## Step 1: Outline use cases and constraints
6 |
7 | > Gather requirements and scope the problem.
8 | > Ask questions to clarify use cases and constraints.
9 | > Discuss assumptions.
10 |
11 | Without an interviewer to address clarifying questions, we'll define some use cases and constraints.
12 |
13 | ### Use cases
14 |
15 | #### We'll scope the problem to handle only the following use cases
16 |
17 | * **Service** crawls a list of urls:
18 | * Generates reverse index of words to pages containing the search terms
19 | * Generates titles and snippets for pages
20 | * Title and snippets are static, they do not change based on search query
21 | * **User** inputs a search term and sees a list of relevant pages with titles and snippets the crawler generated
22 | * Only sketch high level components and interactions for this use case, no need to go into depth
23 | * **Service** has high availability
24 |
25 | #### Out of scope
26 |
27 | * Search analytics
28 | * Personalized search results
29 | * Page rank
30 |
31 | ### Constraints and assumptions
32 |
33 | #### State assumptions
34 |
35 | * Traffic is not evenly distributed
36 | * Some searches are very popular, while others are only executed once
37 | * Support only anonymous users
38 | * Generating search results should be fast
39 | * The web crawler should not get stuck in an infinite loop
40 | * We get stuck in an infinite loop if the graph contains a cycle
41 | * 1 billion links to crawl
42 | * Pages need to be crawled regularly to ensure freshness
43 | * Average refresh rate of about once per week, more frequent for popular sites
44 | * 4 billion links crawled each month
45 | * Average stored size per web page: 500 KB
46 | * For simplicity, count changes the same as new pages
47 | * 100 billion searches per month
48 |
49 | Exercise the use of more traditional systems - don't use existing systems such as [solr](http://lucene.apache.org/solr/) or [nutch](http://nutch.apache.org/).
50 |
51 | #### Calculate usage
52 |
53 | **Clarify with your interviewer if you should run back-of-the-envelope usage calculations.**
54 |
55 | * 2 PB of stored page content per month
56 | * 500 KB per page * 4 billion links crawled per month
57 | * 72 PB of stored page content in 3 years
58 | * 1,600 write requests per second
59 | * 40,000 search requests per second
60 |
61 | Handy conversion guide:
62 |
63 | * 2.5 million seconds per month
64 | * 1 request per second = 2.5 million requests per month
65 | * 40 requests per second = 100 million requests per month
66 | * 400 requests per second = 1 billion requests per month
67 |
68 | ## Step 2: Create a high level design
69 |
70 | > Outline a high level design with all important components.
71 |
72 | 
73 |
74 | ## Step 3: Design core components
75 |
76 | > Dive into details for each core component.
77 |
78 | ### Use case: Service crawls a list of urls
79 |
80 | We'll assume we have an initial list of `links_to_crawl` ranked initially based on overall site popularity. If this is not a reasonable assumption, we can seed the crawler with popular sites that link to outside content such as [Yahoo](https://www.yahoo.com/), [DMOZ](http://www.dmoz.org/), etc
81 |
82 | We'll use a table `crawled_links` to store processed links and their page signatures.
83 |
84 | We could store `links_to_crawl` and `crawled_links` in a key-value **NoSQL Database**. For the ranked links in `links_to_crawl`, we could use [Redis](https://redis.io/) with sorted sets to maintain a ranking of page links. We should discuss the [use cases and tradeoffs between choosing SQL or NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql).
85 |
86 | * The **Crawler Service** processes each page link by doing the following in a loop:
87 | * Takes the top ranked page link to crawl
88 | * Checks `crawled_links` in the **NoSQL Database** for an entry with a similar page signature
89 | * If we have a similar page, reduces the priority of the page link
90 | * This prevents us from getting into a cycle
91 | * Continue
92 | * Else, crawls the link
93 | * Adds a job to the **Reverse Index Service** queue to generate a [reverse index](https://en.wikipedia.org/wiki/Search_engine_indexing)
94 | * Adds a job to the **Document Service** queue to generate a static title and snippet
95 | * Generates the page signature
96 | * Removes the link from `links_to_crawl` in the **NoSQL Database**
97 | * Inserts the page link and signature to `crawled_links` in the **NoSQL Database**
98 |
99 | **Clarify with your interviewer how much code you are expected to write**.
100 |
101 | `PagesDataStore` is an abstraction within the **Crawler Service** that uses the **NoSQL Database**:
102 |
103 | ```
104 | class PagesDataStore(object):
105 |
106 | def __init__(self, db);
107 | self.db = db
108 | ...
109 |
110 | def add_link_to_crawl(self, url):
111 | """Add the given link to `links_to_crawl`."""
112 | ...
113 |
114 | def remove_link_to_crawl(self, url):
115 | """Remove the given link from `links_to_crawl`."""
116 | ...
117 |
118 | def reduce_priority_link_to_crawl(self, url)
119 | """Reduce the priority of a link in `links_to_crawl` to avoid cycles."""
120 | ...
121 |
122 | def extract_max_priority_page(self):
123 | """Return the highest priority link in `links_to_crawl`."""
124 | ...
125 |
126 | def insert_crawled_link(self, url, signature):
127 | """Add the given link to `crawled_links`."""
128 | ...
129 |
130 | def crawled_similar(self, signature):
131 | """Determine if we've already crawled a page matching the given signature"""
132 | ...
133 | ```
134 |
135 | `Page` is an abstraction within the **Crawler Service** that encapsulates a page, its contents, child urls, and signature:
136 |
137 | ```
138 | class Page(object):
139 |
140 | def __init__(self, url, contents, child_urls, signature):
141 | self.url = url
142 | self.contents = contents
143 | self.child_urls = child_urls
144 | self.signature = signature
145 | ```
146 |
147 | `Crawler` is the main class within **Crawler Service**, composed of `Page` and `PagesDataStore`.
148 |
149 | ```
150 | class Crawler(object):
151 |
152 | def __init__(self, data_store, reverse_index_queue, doc_index_queue):
153 | self.data_store = data_store
154 | self.reverse_index_queue = reverse_index_queue
155 | self.doc_index_queue = doc_index_queue
156 |
157 | def create_signature(self, page):
158 | """Create signature based on url and contents."""
159 | ...
160 |
161 | def crawl_page(self, page):
162 | for url in page.child_urls:
163 | self.data_store.add_link_to_crawl(url)
164 | page.signature = self.create_signature(page)
165 | self.data_store.remove_link_to_crawl(page.url)
166 | self.data_store.insert_crawled_link(page.url, page.signature)
167 |
168 | def crawl(self):
169 | while True:
170 | page = self.data_store.extract_max_priority_page()
171 | if page is None:
172 | break
173 | if self.data_store.crawled_similar(page.signature):
174 | self.data_store.reduce_priority_link_to_crawl(page.url)
175 | else:
176 | self.crawl_page(page)
177 | ```
178 |
179 | ### Handling duplicates
180 |
181 | We need to be careful the web crawler doesn't get stuck in an infinite loop, which happens when the graph contains a cycle.
182 |
183 | **Clarify with your interviewer how much code you are expected to write**.
184 |
185 | We'll want to remove duplicate urls:
186 |
187 | * For smaller lists we could use something like `sort | unique`
188 | * With 1 billion links to crawl, we could use **MapReduce** to output only entries that have a frequency of 1
189 |
190 | ```
191 | class RemoveDuplicateUrls(MRJob):
192 |
193 | def mapper(self, _, line):
194 | yield line, 1
195 |
196 | def reducer(self, key, values):
197 | total = sum(values)
198 | if total == 1:
199 | yield key, total
200 | ```
201 |
202 | Detecting duplicate content is more complex. We could generate a signature based on the contents of the page and compare those two signatures for similarity. Some potential algorithms are [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) and [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity).
203 |
204 | ### Determining when to update the crawl results
205 |
206 | Pages need to be crawled regularly to ensure freshness. Crawl results could have a `timestamp` field that indicates the last time a page was crawled. After a default time period, say one week, all pages should be refreshed. Frequently updated or more popular sites could be refreshed in shorter intervals.
207 |
208 | Although we won't dive into details on analytics, we could do some data mining to determine the mean time before a particular page is updated, and use that statistic to determine how often to re-crawl the page.
209 |
210 | We might also choose to support a `Robots.txt` file that gives webmasters control of crawl frequency.
211 |
212 | ### Use case: User inputs a search term and sees a list of relevant pages with titles and snippets
213 |
214 | * The **Client** sends a request to the **Web Server**, running as a [reverse proxy](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
215 | * The **Web Server** forwards the request to the **Query API** server
216 | * The **Query API** server does does the following:
217 | * Parses the query
218 | * Removes markup
219 | * Breaks up the text into terms
220 | * Fixes typos
221 | * Normalizes capitalization
222 | * Converts the query to use boolean operations
223 | * Uses the **Reverse Index Service** to find documents matching the query
224 | * The **Reverse Index Service** ranks the matching results and returns the top ones
225 | * Uses the **Document Service** to return titles and snippets
226 |
227 | We'll use a public [**REST API**](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest):
228 |
229 | ```
230 | $ curl https://search.com/api/v1/search?query=hello+world
231 | ```
232 |
233 | Response:
234 |
235 | ```
236 | {
237 | "title": "foo's title",
238 | "snippet": "foo's snippet",
239 | "link": "https://foo.com",
240 | },
241 | {
242 | "title": "bar's title",
243 | "snippet": "bar's snippet",
244 | "link": "https://bar.com",
245 | },
246 | {
247 | "title": "baz's title",
248 | "snippet": "baz's snippet",
249 | "link": "https://baz.com",
250 | },
251 | ```
252 |
253 | For internal communications, we could use [Remote Procedure Calls](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc).
254 |
255 | ## Step 4: Scale the design
256 |
257 | > Identify and address bottlenecks, given the constraints.
258 |
259 | 
260 |
261 | **Important: Do not simply jump right into the final design from the initial design!**
262 |
263 | State you would 1) **Benchmark/Load Test**, 2) **Profile** for bottlenecks 3) address bottlenecks while evaluating alternatives and trade-offs, and 4) repeat. See [Design a system that scales to millions of users on AWS](https://github.com/donnemartin/system-design-primer/blob/master/solutions/system_design/scaling_aws/README.md) as a sample on how to iteratively scale the initial design.
264 |
265 | It's important to discuss what bottlenecks you might encounter with the initial design and how you might address each of them. For example, what issues are addressed by adding a **Load Balancer** with multiple **Web Servers**? **CDN**? **Master-Slave Replicas**? What are the alternatives and **Trade-Offs** for each?
266 |
267 | We'll introduce some components to complete the design and to address scalability issues. Internal load balancers are not shown to reduce clutter.
268 |
269 | *To avoid repeating discussions*, refer to the following [system design topics](https://github.com/donnemartin/system-design-primer#index-of-system-design-topics) for main talking points, tradeoffs, and alternatives:
270 |
271 | * [DNS](https://github.com/donnemartin/system-design-primer#domain-name-system)
272 | * [Load balancer](https://github.com/donnemartin/system-design-primer#load-balancer)
273 | * [Horizontal scaling](https://github.com/donnemartin/system-design-primer#horizontal-scaling)
274 | * [Web server (reverse proxy)](https://github.com/donnemartin/system-design-primer#reverse-proxy-web-server)
275 | * [API server (application layer)](https://github.com/donnemartin/system-design-primer#application-layer)
276 | * [Cache](https://github.com/donnemartin/system-design-primer#cache)
277 | * [NoSQL](https://github.com/donnemartin/system-design-primer#nosql)
278 | * [Consistency patterns](https://github.com/donnemartin/system-design-primer#consistency-patterns)
279 | * [Availability patterns](https://github.com/donnemartin/system-design-primer#availability-patterns)
280 |
281 | Some searches are very popular, while others are only executed once. Popular queries can be served from a **Memory Cache** such as Redis or Memcached to reduce response times and to avoid overloading the **Reverse Index Service** and **Document Service**. The **Memory Cache** is also useful for handling the unevenly distributed traffic and traffic spikes. Reading 1 MB sequentially from memory takes about 250 microseconds, while reading from SSD takes 4x and from disk takes 80x longer.1
282 |
283 | Below are a few other optimizations to the **Crawling Service**:
284 |
285 | * To handle the data size and request load, the **Reverse Index Service** and **Document Service** will likely need to make heavy use sharding and replication.
286 | * DNS lookup can be a bottleneck, the **Crawler Service** can keep its own DNS lookup that is refreshed periodically
287 | * The **Crawler Service** can improve performance and reduce memory usage by keeping many open connections at a time, referred to as [connection pooling](https://en.wikipedia.org/wiki/Connection_pool)
288 | * Switching to [UDP](https://github.com/donnemartin/system-design-primer#user-datagram-protocol-udp) could also boost performance
289 | * Web crawling is bandwidth intensive, ensure there is enough bandwidth to sustain high throughput
290 |
291 | ## Additional talking points
292 |
293 | > Additional topics to dive into, depending on the problem scope and time remaining.
294 |
295 | ### SQL scaling patterns
296 |
297 | * [Read replicas](https://github.com/donnemartin/system-design-primer#master-slave)
298 | * [Federation](https://github.com/donnemartin/system-design-primer#federation)
299 | * [Sharding](https://github.com/donnemartin/system-design-primer#sharding)
300 | * [Denormalization](https://github.com/donnemartin/system-design-primer#denormalization)
301 | * [SQL Tuning](https://github.com/donnemartin/system-design-primer#sql-tuning)
302 |
303 | #### NoSQL
304 |
305 | * [Key-value store](https://github.com/donnemartin/system-design-primer#key-value-store)
306 | * [Document store](https://github.com/donnemartin/system-design-primer#document-store)
307 | * [Wide column store](https://github.com/donnemartin/system-design-primer#wide-column-store)
308 | * [Graph database](https://github.com/donnemartin/system-design-primer#graph-database)
309 | * [SQL vs NoSQL](https://github.com/donnemartin/system-design-primer#sql-or-nosql)
310 |
311 | ### Caching
312 |
313 | * Where to cache
314 | * [Client caching](https://github.com/donnemartin/system-design-primer#client-caching)
315 | * [CDN caching](https://github.com/donnemartin/system-design-primer#cdn-caching)
316 | * [Web server caching](https://github.com/donnemartin/system-design-primer#web-server-caching)
317 | * [Database caching](https://github.com/donnemartin/system-design-primer#database-caching)
318 | * [Application caching](https://github.com/donnemartin/system-design-primer#application-caching)
319 | * What to cache
320 | * [Caching at the database query level](https://github.com/donnemartin/system-design-primer#caching-at-the-database-query-level)
321 | * [Caching at the object level](https://github.com/donnemartin/system-design-primer#caching-at-the-object-level)
322 | * When to update the cache
323 | * [Cache-aside](https://github.com/donnemartin/system-design-primer#cache-aside)
324 | * [Write-through](https://github.com/donnemartin/system-design-primer#write-through)
325 | * [Write-behind (write-back)](https://github.com/donnemartin/system-design-primer#write-behind-write-back)
326 | * [Refresh ahead](https://github.com/donnemartin/system-design-primer#refresh-ahead)
327 |
328 | ### Asynchronism and microservices
329 |
330 | * [Message queues](https://github.com/donnemartin/system-design-primer#message-queues)
331 | * [Task queues](https://github.com/donnemartin/system-design-primer#task-queues)
332 | * [Back pressure](https://github.com/donnemartin/system-design-primer#back-pressure)
333 | * [Microservices](https://github.com/donnemartin/system-design-primer#microservices)
334 |
335 | ### Communications
336 |
337 | * Discuss tradeoffs:
338 | * External communication with clients - [HTTP APIs following REST](https://github.com/donnemartin/system-design-primer#representational-state-transfer-rest)
339 | * Internal communications - [RPC](https://github.com/donnemartin/system-design-primer#remote-procedure-call-rpc)
340 | * [Service discovery](https://github.com/donnemartin/system-design-primer#service-discovery)
341 |
342 | ### Security
343 |
344 | Refer to the [security section](https://github.com/donnemartin/system-design-primer#security).
345 |
346 | ### Latency numbers
347 |
348 | See [Latency numbers every programmer should know](https://github.com/donnemartin/system-design-primer#latency-numbers-every-programmer-should-know).
349 |
350 | ### Ongoing
351 |
352 | * Continue benchmarking and monitoring your system to address bottlenecks as they come up
353 | * Scaling is an iterative process
354 |
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/web_crawler/__init__.py
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/web_crawler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/web_crawler/web_crawler.png
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/web_crawler_basic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mutablealligator/system-design-primer/0889f04f0d8162b6513437d3171a8ebb9ef5e504/solutions/system_design/web_crawler/web_crawler_basic.png
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/web_crawler_mapreduce.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from mrjob.job import MRJob
4 |
5 |
6 | class RemoveDuplicateUrls(MRJob):
7 |
8 | def mapper(self, _, line):
9 | yield line, 1
10 |
11 | def reducer(self, key, values):
12 | total = sum(values)
13 | if total == 1:
14 | yield key, total
15 |
16 | def steps(self):
17 | """Run the map and reduce steps."""
18 | return [
19 | self.mr(mapper=self.mapper,
20 | reducer=self.reducer)
21 | ]
22 |
23 |
24 | if __name__ == '__main__':
25 | RemoveDuplicateUrls.run()
26 |
--------------------------------------------------------------------------------
/solutions/system_design/web_crawler/web_crawler_snippets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class PagesDataStore(object):
4 |
5 | def __init__(self, db);
6 | self.db = db
7 | ...
8 |
9 | def add_link_to_crawl(self, url):
10 | """Add the given link to `links_to_crawl`."""
11 | ...
12 |
13 | def remove_link_to_crawl(self, url):
14 | """Remove the given link from `links_to_crawl`."""
15 | ...
16 |
17 | def reduce_priority_link_to_crawl(self, url)
18 | """Reduce the priority of a link in `links_to_crawl` to avoid cycles."""
19 | ...
20 |
21 | def extract_max_priority_page(self):
22 | """Return the highest priority link in `links_to_crawl`."""
23 | ...
24 |
25 | def insert_crawled_link(self, url, signature):
26 | """Add the given link to `crawled_links`."""
27 | ...
28 |
29 | def crawled_similar(self, signature):
30 | """Determine if we've already crawled a page matching the given signature"""
31 | ...
32 |
33 |
34 | class Page(object):
35 |
36 | def __init__(self, url, contents, child_urls):
37 | self.url = url
38 | self.contents = contents
39 | self.child_urls = child_urls
40 | self.signature = self.create_signature()
41 |
42 | def create_signature(self):
43 | # Create signature based on url and contents
44 | ...
45 |
46 |
47 | class Crawler(object):
48 |
49 | def __init__(self, pages, data_store, reverse_index_queue, doc_index_queue):
50 | self.pages = pages
51 | self.data_store = data_store
52 | self.reverse_index_queue = reverse_index_queue
53 | self.doc_index_queue = doc_index_queue
54 |
55 | def crawl_page(self, page):
56 | for url in page.child_urls:
57 | self.data_store.add_link_to_crawl(url)
58 | self.reverse_index_queue.generate(page)
59 | self.doc_index_queue.generate(page)
60 | self.data_store.remove_link_to_crawl(page.url)
61 | self.data_store.insert_crawled_link(page.url, page.signature)
62 |
63 | def crawl(self):
64 | while True:
65 | page = self.data_store.extract_max_priority_page()
66 | if page is None:
67 | break
68 | if self.data_store.crawled_similar(page.signature):
69 | self.data_store.reduce_priority_link_to_crawl(page.url)
70 | else:
71 | self.crawl_page(page)
72 | page = self.data_store.extract_max_priority_page()
73 |
--------------------------------------------------------------------------------