├── app
    ├── routers
    │   ├── __init__.py
    │   ├── .DS_Store
    │   └── validations.py
    ├── database
    │   ├── __init__.py
    │   ├── .DS_Store
    │   ├── database.py
    │   └── models.py
    ├── services
    │   ├── __init__.py
    │   ├── .DS_Store
    │   ├── generate_graph.py
    │   └── validations.py
    ├── .DS_Store
    ├── data.db
    ├── utils.py
    └── main.py
├── .python-version
├── .gitignore
├── docker-compose.yml
├── resources
    └── project-preview.png
├── requirements.txt
├── .vscode
    └── settings.json
├── Dockerfile
├── README.md
└── challenges
    ├── widgets.md
    └── stack-trace.md


/app/routers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | l7-assignment


--------------------------------------------------------------------------------
/app/database/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | uploads/
3 | static/
4 | .DS_Store


--------------------------------------------------------------------------------
/app/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/app/.DS_Store


--------------------------------------------------------------------------------
/app/data.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/app/data.db


--------------------------------------------------------------------------------
/app/database/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/app/database/.DS_Store


--------------------------------------------------------------------------------
/app/routers/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/app/routers/.DS_Store


--------------------------------------------------------------------------------
/app/services/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/app/services/.DS_Store


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | services:
3 |   web:
4 |     build: .
5 |     ports:
6 |       - "8000:80"
7 | 


--------------------------------------------------------------------------------
/resources/project-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Big-Silver/benfords-law-web-app/main/resources/project-preview.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.103.1
2 | pydantic==2.3.0
3 | pandas==2.0.3
4 | matplotlib==3.7.2
5 | pytest==7.4.2
6 | SQLAlchemy==2.0.20
7 | uvicorn==0.23.2
8 | python-multipart==0.0.6


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cSpell.words": [
 3 |     "Benford",
 4 |     "benfords",
 5 |     "benfrods",
 6 |     "fastapi",
 7 |     "sqlalchemy",
 8 |     "xlabel",
 9 |     "ylabel"
10 |   ]
11 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python image as the base image
 2 | FROM python:3.8-slim
 3 | 
 4 | # Create and set the working directory
 5 | WORKDIR /code
 6 | 
 7 | # Copy the current directory contents into the container at /app
 8 | COPY /app /code/
 9 | COPY ./requirements.txt /code/requirements.txt
10 | 
11 | # Install any needed packages specified in requirements.txt
12 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13 | 
14 | # Make port 80 available to the world outside this container
15 | EXPOSE 8000
16 | 
17 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]


--------------------------------------------------------------------------------
/app/utils.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.middleware.cors import CORSMiddleware
 3 | import random
 4 | import string
 5 | 
 6 | app = FastAPI()
 7 | 
 8 | # Enable CORS
 9 | origins = ["*"]
10 | app.add_middleware(CORSMiddleware, allow_origins=origins)
11 | 
12 | 
13 | # Check if a file has a valid extension
14 | def allowed_file(filename: str):
15 |     allowed_extensions = {'txt', 'csv'}
16 |     return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions
17 | 
18 | # Function to generate a random filename
19 | def generate_random_filename():
20 |     letters = string.ascii_letters
21 |     return ''.join(random.choice(letters) for _ in range(10))
22 | 


--------------------------------------------------------------------------------
/app/database/database.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine
 2 | from sqlalchemy.orm import sessionmaker
 3 | from sqlalchemy.ext.declarative import declarative_base
 4 | 
 5 | Base = declarative_base()
 6 | 
 7 | SQLALCHEMY_DATABASE_URL = "sqlite:///data.db"
 8 | 
 9 | engine = create_engine(
10 |     SQLALCHEMY_DATABASE_URL,
11 |     pool_pre_ping=True,
12 | )
13 | 
14 | SessionLocal = sessionmaker(
15 |     bind=engine,
16 |     autocommit=False,
17 |     autoflush=False,
18 | )
19 | 
20 | Base = declarative_base()
21 | # Create the tables if they don't exist
22 | Base.metadata.create_all(bind=engine)
23 | 
24 | def get_db():
25 |     db = SessionLocal()
26 |     try:
27 |         yield db
28 |     finally:
29 |         db.close()


--------------------------------------------------------------------------------
/app/services/generate_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def generate_bar_chart(data, analysis_id):
 5 |     # Generate a bar chart using Matplotlib
 6 |     try:
 7 |         labels = list(data.keys())
 8 |         values = list(data.values())
 9 | 
10 |         plt.figure(figsize=(8, 4))
11 |         plt.bar(labels, values)
12 |         plt.xlabel('Leading Digit')
13 |         plt.ylabel('Frequency')
14 |         plt.title('Benford\'s Law Validation')
15 |         
16 |         graph_image_path = os.path.join('static', f'{analysis_id}-benfords_plot.png')
17 |         plt.savefig(graph_image_path)
18 | 
19 |         return graph_image_path
20 |     except Exception as e:
21 |         print(e)
22 |         return f'generate_bar_chart failed: {str(e)}'
23 | 
24 | 


--------------------------------------------------------------------------------
/app/database/models.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | from sqlalchemy import Column, Integer, String, DateTime, Text
 4 | from datetime import datetime
 5 | from .database import Base
 6 | 
 7 | class DataAnalysisHistory(Base):
 8 |     __tablename__ = "data_analysis_history"
 9 | 
10 |     id = Column(Integer, primary_key=True, index=True)
11 |     filename = Column(String, index=True)
12 |     result = Column(String)
13 |     data = Column(Text)
14 |     created_at = Column(DateTime, default=datetime.utcnow)
15 | 
16 |     def to_dict(self):
17 |         return {
18 |             "id": self.id,
19 |             "filename": self.filename,
20 |             "result": self.result,
21 |             "data": json.loads(self.data),
22 |             "created_at": self.created_at.strftime("%Y-%m-%d %H:%M:%S")
23 |         }
24 | 
25 | 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from fastapi import FastAPI
 3 | from fastapi.middleware.cors import CORSMiddleware
 4 | import database.models as models
 5 | from database.database import engine
 6 | from routers import validations
 7 | 
 8 | # Define the directory path
 9 | static_directory = 'static'
10 | upload_directory = 'upload'
11 | 
12 | # Create the directory if it doesn't exist
13 | if not os.path.exists(static_directory):
14 |     os.makedirs(static_directory)
15 | 
16 | if not os.path.exists(upload_directory):
17 |     os.makedirs(upload_directory)
18 | 
19 | # Enable CORS
20 | origins = ["*"]
21 | app = FastAPI()
22 | app.add_middleware(
23 |     CORSMiddleware,
24 |     allow_origins=origins,
25 |     allow_credentials=True,
26 |     allow_methods=["*"],
27 |     allow_headers=["*"],
28 | )
29 | 
30 | models.Base.metadata.create_all(engine)
31 | 
32 | app.include_router(validations.router)
33 | 
34 | if __name__ == "__main__":
35 |     import uvicorn
36 |     uvicorn.run(app, host="0.0.0.0", port=8000)


--------------------------------------------------------------------------------
/app/services/validations.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from fastapi import HTTPException
 3 | from sqlalchemy.orm import Session
 4 | from database.models import DataAnalysisHistory
 5 | import database.database as database
 6 | 
 7 | 
 8 | import pandas as pd
 9 | 
10 | def apply_benfords_law(file_path):
11 |     try:
12 |         data = pd.read_csv(file_path, sep="\t")
13 |         numerical_columns = data.select_dtypes(include=['int', 'float']).columns
14 | 
15 |         if (numerical_columns.size == 0):
16 |             return 'No numerical columns found', None
17 | 
18 |         numerical_column_data = data[numerical_columns[0]]
19 | 
20 |         leading_digit_counts = numerical_column_data.astype(str).str[0].value_counts(normalize=True)
21 | 
22 |         for digit in range(1, 10):
23 |             if str(digit) not in leading_digit_counts.index:
24 |                 leading_digit_counts[str(digit)] = 0
25 | 
26 |         leading_digit_counts = leading_digit_counts.sort_index()
27 | 
28 |         # Check if the proportion of '1' as the leading digit is about 30%
29 |         observed_proportion_1 = leading_digit_counts.get('1', 0)
30 |         is_valid = abs(observed_proportion_1 - 0.301) < 0.02  # Allowing a 2% tolerance
31 | 
32 |         if is_valid:
33 |             return 'Validation successful', leading_digit_counts
34 |         else:
35 |             return 'Validation failed: Data does not conform to Benford\'s Law', leading_digit_counts
36 | 
37 |     except Exception as e:
38 |         return f'Validation failed: {str(e)}', None
39 | 
40 | 
41 | def validate_benfrods_law_service(session: Session, filename, system_file_path):
42 |     try:
43 |         result, data = apply_benfords_law(system_file_path)
44 |         serialized_data = json.dumps(data.to_dict())
45 |         data_analysis = DataAnalysisHistory(filename=filename, result=result, data=serialized_data)
46 | 
47 |         session.add(data_analysis)
48 |         session.commit()
49 | 
50 |         return data_analysis, data
51 |     except Exception as e:
52 |         session.rollback()
53 |         raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
54 |     
55 | def validate_benfrods_law_service(session: Session, filename, system_file_path):
56 |     try:
57 |         result, data = apply_benfords_law(system_file_path)
58 |         serialized_data = json.dumps(data.to_dict())
59 |         data_analysis = DataAnalysisHistory(filename=filename, result=result, data=serialized_data)
60 | 
61 |         session.add(data_analysis)
62 |         session.commit()
63 | 
64 |         return data_analysis, data
65 |     except Exception as e:
66 |         session.rollback()
67 |         raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
68 |     
69 | def get_validation_analysis(session: Session, analysis_id):
70 |     try:
71 |         data_analysis = session.query(DataAnalysisHistory).filter_by(id=analysis_id).first()
72 |         
73 |         if not data_analysis:
74 |             raise HTTPException(status_code=404, detail="Analysis not found")
75 | 
76 |         return data_analysis
77 | 
78 |     except Exception as e:
79 |         session.rollback()
80 |         raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
81 |     
82 | 
83 | async def get_data_analysis_history(session: Session):
84 |     data_analysis_list = session.query(DataAnalysisHistory).all()
85 |     return data_analysis_list


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Benford's Law Web Application
  2 | 
  3 | Challenge: Benford's Law
  4 | 
  5 | In 1938, Frank Benford published a paper showing the distribution of the leading digit in many disparate sources of data. In all these sets of data, the number 1 was the leading digit about 30% of the time. Benford's law has been found to apply to population numbers, death rates, lengths of rivers, mathematical distributions given by some power law, and physical constants like atomic weights and specific heats.
  6 | 
  7 | Create a python-based web application that
  8 | 
  9 | 1. can parse the attached example file (census_2009) or another flat file with a viable target column. Note that other columns in user-submitted files may or may not be the same as the census data file and users are known for submitting files that don't always conform to rigid expectations. How you deal with files that don't conform to the expectations of the application is up to you, but should be reasonable and defensible.
 10 | 
 11 | 2. validates Benford's assertion
 12 | 
 13 | 3. Displays results to the user in a visually interesting manner
 14 | 
 15 | The solution should be delivered as a link to a git repository (e.g. hosted via github). The solution should be runnable by L7 by cloning the repository and following any provided instructions.
 16 | 
 17 | Stretch goals:
 18 | 
 19 | - The solution may be executable via Docker
 20 | 
 21 | - The solution allows users to save their data files and results and retrieve them at a later time so they can view data that was tested historically vs. their new data.
 22 | 
 23 | - Some level of automated testing (unit or integration)
 24 | 
 25 | ## Prerequisites
 26 | 
 27 | - Python 3.8 or higher
 28 | - Docker (optional)
 29 | 
 30 | ## Getting Started
 31 | 
 32 | For this project, I have used python Fast API due to its built in documentation features and fast development. In order to play with this project, Let's do the following:
 33 | 
 34 | 1. Clone the repository:
 35 | 
 36 |    ```bash
 37 |    git clone <repository_url>
 38 |    cd benfords-law-web-app
 39 |    ```
 40 | 
 41 | 2. Set up a virtual environment (recommended):
 42 | 
 43 |    ```bash
 44 |    python -m venv l7-assignment
 45 |    source l7-assignment/bin/activate # On Windows, use: l7-assignment\Scripts\activate
 46 |    ```
 47 | 
 48 |    or Using `pyenv`
 49 | 
 50 |    ```bash
 51 |    pyenv virtualenv 3.8.10 l7-assignment
 52 |    ```
 53 | 
 54 |    It will automatically be activated due to `.python-version` file.
 55 | 
 56 |    Note: `pyenv` needs to be installed before. Besides, I have used Python 3.8.10 for this project.
 57 | 
 58 | 3. Install dependencies:
 59 | 
 60 |    ```bash
 61 |    pip install -r requirements.txt
 62 |    ```
 63 | 
 64 | ## Running the Application
 65 | 
 66 | To run the application locally, use the following command:
 67 | 
 68 | ```bash
 69 | cd /app
 70 | uvicorn main:app --host 0.0.0.0 --port 8000 --reload
 71 | ```
 72 | 
 73 | The application will be accessible at http://localhost:8000 in your web browser.
 74 | **Note: It provides the documentation feature of box. http://localhost:8000/docs**
 75 | 
 76 | ## Application Walkthrough
 77 | 
 78 | - Visit the application in your web browser (http://localhost:8000/docs).
 79 | 
 80 |   There are 5 apis which are available in this project, providing both responses json as well as graphs for validating the new file or previewing the old file(s).
 81 |   <img src="./resources/project-preview.png" alt="APIs preview">
 82 | 
 83 | - Upload new file, validate and see results in both json and graph based on the api that is triggered.
 84 | 
 85 |   - API POST:`/validate-dataset/json`will give json results for a new file
 86 |   - API POST:`/validate-dataset/visualize`will give graph analysis for a new file
 87 | 
 88 | - Previewing existing record, validation status and graph by giving id
 89 |   - API POST:`/validate-dataset/{analysis_id}/json`will give json results for an existing analysis record
 90 |   - API POST:`/validate-dataset/{analysis_id}/visualize`will give json results for an existing analysis record
 91 | 
 92 | ## Viewing Historical Data
 93 | 
 94 | - API `/validate-dataset/history/` will give historical validation results
 95 | 
 96 | ## Running Automated Tests
 97 | 
 98 | To run the automated tests, use the following command:
 99 | 
100 | ```bash
101 | pytest tests/
102 | ```
103 | 
104 | ## Docker Support (Optional)
105 | 
106 | If you prefer to run the application in a Docker container, follow these steps:
107 | 
108 | 1. Build the Docker image:
109 | 
110 | ```bash
111 | docker build -t benfords-law-app .
112 | ```
113 | 
114 | 2. Run the Docker container:
115 | 
116 | ```bash
117 | docker run -p 8000:8000 benfords-law-app
118 | ```
119 | 
120 | The application will be accessible at http://localhost:5000 in your web browser.
121 | 
122 | ## Cleanup
123 | 
124 | To stop and remove the Docker container and image (if used), use the following commands:
125 | 
126 | ```bash
127 | docker stop <container_id>
128 | docker rm <container_id>
129 | docker rmi benfords-law-app
130 | ```
131 | 


--------------------------------------------------------------------------------
/app/routers/validations.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from fastapi import (
  4 |     APIRouter,
  5 |     Depends,
  6 |     File,
  7 |     HTTPException,
  8 |     UploadFile,
  9 | )
 10 | from sqlalchemy.orm import Session
 11 | from database.models import DataAnalysisHistory
 12 | from fastapi.responses import JSONResponse, FileResponse
 13 | from utils import allowed_file, generate_random_filename
 14 | import database.database as database
 15 | from services import validations as validations_service
 16 | from services import generate_graph
 17 | 
 18 | router = APIRouter()
 19 | db = database.get_db
 20 | router = APIRouter(tags=["Validations"])
 21 | 
 22 | def validate_and_save_csv(file):
 23 |     if not file:
 24 |         raise HTTPException(status_code=400, detail="No file uploaded")
 25 | 
 26 |     if not allowed_file(file.filename):
 27 |         raise HTTPException(status_code=400, detail="Invalid file format. Please upload a valid text or CSV file.")
 28 | 
 29 |     # Generate a random filename to avoid conflicts
 30 |     filename = generate_random_filename() + ".csv"
 31 |     system_file_path = os.path.join("uploads", filename)
 32 | 
 33 |     try:
 34 |         with open(system_file_path, "wb") as f:
 35 |             f.write(file.file.read())
 36 |     
 37 |     except Exception as e:
 38 |         print(e)
 39 |         return JSONResponse(content="Unable to read file", status_code=500)
 40 | 
 41 |     return filename, system_file_path
 42 | 
 43 | 
 44 | @router.post("/validate-dataset/json")
 45 | async def validate_benfrods_law(file: UploadFile = File(
 46 |         ..., description="Upload a file numerical data column"
 47 |     ), db: Session = Depends(db)):
 48 | 
 49 |     try:
 50 |         filename, system_file_path = validate_and_save_csv(file)
 51 |     except Exception as e:
 52 |         return JSONResponse(content=e, status_code=500)
 53 |     
 54 |     try:
 55 |         data_analysis, _ = validations_service.validate_benfrods_law_service(db, filename, system_file_path)
 56 | 
 57 |         return JSONResponse(content={
 58 |                 "message": data_analysis.result,
 59 |                 "data": json.dumps(data_analysis.to_dict()),
 60 |             }, status_code=201)
 61 | 
 62 |     
 63 |     except Exception as e:
 64 |         print("backend/app/routers/validations.py:64", e)
 65 |         return JSONResponse(content="Unable to validate file", status_code=500)
 66 |     
 67 | 
 68 | @router.post("/validate-dataset/visualize", response_class=FileResponse)
 69 | async def validate_benfrods_law(
 70 |     file: UploadFile = File(
 71 |         ..., description="Upload a file numerical data column"
 72 |     ), 
 73 |     db: Session = Depends(db),
 74 | ):
 75 |     try:
 76 |         filename, system_file_path = validate_and_save_csv(file)
 77 |     except Exception as e:
 78 |         return JSONResponse(content=e, status_code=500)
 79 |     
 80 |     try:
 81 |         data_analysis, data  = validations_service.validate_benfrods_law_service(db, filename, system_file_path)
 82 |         image_path = generate_graph.generate_bar_chart(data.to_dict(), data_analysis.id)
 83 |         image =  FileResponse(image_path)
 84 | 
 85 |         return image
 86 |     
 87 |     except Exception as e:
 88 |         print("backend/app/routers/validations.py:85", e)
 89 |         return JSONResponse(content="Unable to validate file", status_code=500)
 90 | 
 91 | @router.get("/validate-dataset/{analysis_id}/json")
 92 | async def visualize_validation_analysis(
 93 |     analysis_id: int, 
 94 |     session: Session = Depends(db)
 95 | ):
 96 |     try:
 97 |         data_analysis = validations_service.get_validation_analysis(session, analysis_id)
 98 | 
 99 |         # Generate the graph
100 | 
101 |         return JSONResponse(content={
102 |                 "message": data_analysis.result,
103 |                 "data": json.dumps(data_analysis.to_dict()),
104 |             }, status_code=201)
105 |     
106 |     except Exception as e:
107 |         print("backend/app/routers/validations.py:10", e)
108 |         return JSONResponse(content="Issues with ", status_code=500)
109 | 
110 | 
111 | @router.get("/validate-dataset/{analysis_id}/visualize", response_class=FileResponse)
112 | async def visualize_validation_analysis(
113 |     analysis_id: int, 
114 |     session: Session = Depends(db)
115 | ):
116 |     try:
117 |         data_analysis = validations_service.get_validation_analysis(session, analysis_id)
118 | 
119 |         # Generate the graph
120 |         image_path = generate_graph.generate_bar_chart(json.loads(data_analysis.data), analysis_id)
121 |         image =  FileResponse(image_path)
122 | 
123 |         return image
124 |     except Exception as e:
125 |         print("backend/app/routers/validations.py:10", e)
126 |         return JSONResponse(content="Unable to generate graph", status_code=500)
127 | 
128 | @router.get("/validate-dataset/history/")
129 | async def history(db: Session = Depends(db)):
130 |     data_analysis_list = db.query(DataAnalysisHistory).all()
131 |     return data_analysis_list


--------------------------------------------------------------------------------
/challenges/widgets.md:
--------------------------------------------------------------------------------
  1 | # Challenge: Widgets
  2 | 
  3 | Create a basic system description and document a normalized schema from the attached widgets text file. Include
  4 | 
  5 | 1. what you think this system does
  6 | 2. what you feel would be a reasonable database structure for the data and a reasonable architecture for the system
  7 | 3. any questions or concerns you have regarding this dataset/system that might need to be answered before establishing an ideal database/solution for such a system.
  8 | 
  9 | It's a very open-ended problem, and that's part of the problem.
 10 | 
 11 | ## System Description:
 12 | 
 13 | The system appears to manage information related to various types of traps, specifically animal traps, and their associated details. This includes information about the type of trap, its packaging, the customer or retailer it is sold to, its price, the supplier or manufacturer, the cost, the warehouse where it is stored, the quantity available, and a minimum quantity requirement.
 14 | 
 15 | ## Proposed Database Structure:
 16 | 
 17 | To create a normalized schema for this system, we can design a relational database with the following tables:
 18 | 
 19 | 1. Widgets (or Traps) Table:
 20 | 
 21 |    - Fields:
 22 |      - widget_id (Primary Key),
 23 |      - widget_name
 24 | 
 25 | 2. Packaging Table:
 26 | 
 27 |    - Fields:
 28 |      - packaging_id (Primary Key),
 29 |      - packaging_type
 30 | 
 31 | 3. Customers Table:
 32 | 
 33 |    - Fields:
 34 |      - customer_id (Primary Key),
 35 |      - customer_name
 36 | 
 37 | 4. Suppliers Table:
 38 | 
 39 |    - Fields:
 40 |      - supplier_id (Primary Key),
 41 |      - supplier_name
 42 | 
 43 | 5. Warehouses Table:
 44 | 
 45 |    - Fields:
 46 |      - warehouse_id (Primary Key),
 47 |      - warehouse_location
 48 | 
 49 | 6. Widgets_Info Table:
 50 |    - Fields:
 51 |      - widget_info_id (Primary Key),
 52 |      - widget_id (Foreign Key),
 53 |      - packaging_id (Foreign Key),
 54 |      - customer_id (Foreign Key),
 55 |      - price,
 56 |      - supplier_id (Foreign Key),
 57 |      - cost, warehouse_id (Foreign Key),
 58 |      - qty,
 59 |      - min_qty
 60 | 
 61 | ### Note:
 62 | 
 63 | Foreign keys link the Widgets_Info table to other tables, creating relationships. This allows us to retrieve specific information about widgets, packaging, customers, suppliers, and warehouses associated with each widget.
 64 | 
 65 | ## System Architecture:
 66 | 
 67 | The system can be built using a multi-tier architecture:
 68 | 
 69 | ### 1. Presentation Layer:
 70 | 
 71 | - This layer includes a user interface where users can interact with the system. It could be a web application, a mobile app, or a desktop application.
 72 | 
 73 | ### 2. Application Layer:
 74 | 
 75 | - This layer contains the business logic of the system. It processes user requests, communicates with the database, and performs necessary operations.
 76 | - Programming languages like Python, Java, or .NET can be used to develop the application layer.
 77 | 
 78 | ### 3. Database Layer:
 79 | 
 80 | The database layer stores and manages the data. A relational database management system (RDBMS) like MySQL, PostgreSQL, or Microsoft SQL Server can be used.
 81 | The normalized schema described above will be implemented in the database layer.
 82 | 
 83 | ## Questions/Concerns:
 84 | 
 85 | ### 1. Data Integrity:
 86 | 
 87 | Ensure data integrity by enforcing constraints and validations, such as checking that the cost is less than the price and that the quantity is not less than the minimum quantity.
 88 | 
 89 | ### 2. Updates and Inserts:
 90 | 
 91 | Consider how frequently new trap records are added and existing records are updated. Optimize the database for efficient INSERT and UPDATE operations.
 92 | 
 93 | ### 3. Reporting:
 94 | 
 95 | Determine if the system needs reporting capabilities, such as sales reports, inventory reports, or supplier performance reports. This will influence the database design.
 96 | 
 97 | ### 4. Security:
 98 | 
 99 | Implement proper security measures to protect sensitive data, especially pricing and supplier information.
100 | 
101 | ### 5. Scalability:
102 | 
103 | Plan for scalability in case the number of trap types, customers, or suppliers grows significantly over time.
104 | 
105 | ### 6. User Access Control:
106 | 
107 | Define roles and permissions for users accessing the system to ensure that only authorized individuals can perform certain actions.
108 | 
109 | ### 7. Backup and Recovery:
110 | 
111 | Implement a robust backup and recovery strategy to prevent data loss in case of system failures.
112 | 
113 | ### 8. Performance:
114 | 
115 | Consider potential performance bottlenecks and implement indexing and caching strategies to optimize database queries.
116 | 
117 | ### 9. API Integration:
118 | 
119 | If necessary, plan for integration with external systems or APIs for tasks like order processing or supplier communication.
120 | 
121 | ### 10. User Training:
122 | 
123 | Train users on how to use the system effectively to minimize errors and improve productivity.
124 | 
125 | By addressing these questions and concerns, you can design an ideal database and system solution for managing the widgets or traps effectively.
126 | 


--------------------------------------------------------------------------------
/challenges/stack-trace.md:
--------------------------------------------------------------------------------
  1 | # Challenge: Python Stack Trace Interpretation
  2 | 
  3 | See the "Python Stack Traces" attachment which lists several python stack traces. Your task is to examine the stack traces and provide a brief response for each one that summarizes what the problem or likely problem is, and the first line of code you would jump to in your code editor given the trace.
  4 | 
  5 | ## Problem number 1:
  6 | 
  7 | ### Input:
  8 | 
  9 | ```python
 10 | 
 11 | Traceback (most recent call last):
 12 |   File "stack_traces.py", line 36, in run_trace
 13 |     f()
 14 |   File "stack_traces.py", line 45, in <lambda>
 15 |     run_trace(1, lambda: perform_calculation(add, '1', 3))
 16 |   File "stack_traces.py", line 8, in perform_calculation
 17 |     calc(x, y)
 18 |   File "stack_traces.py", line 12, in add
 19 |     return x + y
 20 | TypeError: can only concatenate str (not "int") to str
 21 | ```
 22 | 
 23 | ### Root cause:
 24 | 
 25 | The problem is a TypeError due to attempting to concatenate a string and an integer.
 26 | 
 27 | ### First Line to check:
 28 | 
 29 | Go to line 12 in the perform_calculation function, where add(x, y) is called.
 30 | 
 31 | ### Possible solution:
 32 | 
 33 | Convert the string to a integer before addition. We can use int() to do this: int(x) + int(y) if numbers are integers. If numbers are floating point numbers, then we can use float() to do this: float(x) + float(y).
 34 | 
 35 | ## Problem number 2:
 36 | 
 37 | ### Input:
 38 | 
 39 | ```python
 40 | Traceback (most recent call last):
 41 | File "stack_traces.py", line 36, in run_trace
 42 | f()
 43 | File "stack_traces.py", line 46, in <lambda>
 44 | run_trace(2, lambda: perform_calculation(add, 7, '3'))
 45 | File "stack_traces.py", line 8, in perform_calculation
 46 | calc(x, y)
 47 | File "stack_traces.py", line 12, in add
 48 | return x + y
 49 | TypeError: unsupported operand type(s) for +: 'int' and 'str'
 50 | ```
 51 | 
 52 | ### Root cause:
 53 | 
 54 | The problem is a TypeError due to attempting to add an integer and a string.
 55 | 
 56 | ### First Line to check:
 57 | 
 58 | Navigate to line 12 in the perform_calculation function, where add(x, y) is called.
 59 | 
 60 | ### Possible solution:
 61 | 
 62 | Convert the string to integer or floats.
 63 | 
 64 | Problem number 3:
 65 | 
 66 | Input:
 67 | 
 68 | ```python
 69 | Traceback (most recent call last):
 70 | File "stack_traces.py", line 36, in run_trace
 71 | f()
 72 | File "stack_traces.py", line 47, in <lambda>
 73 | run_trace(3, lambda: perform_calculation(mult, '3', '3'))
 74 | File "stack_traces.py", line 8, in perform_calculation
 75 | calc(x, y)
 76 | File "stack_traces.py", line 15, in mult
 77 | return x \* y
 78 | TypeError: can't multiply sequence by non-int of type 'str'
 79 | ```
 80 | 
 81 | ### Root cause:
 82 | 
 83 | The problem is a TypeError caused by trying to multiply two strings, which is not supported.
 84 | 
 85 | ### First Line to check:
 86 | 
 87 | Look at line 15 in the mult function, where x \* y is calculated.
 88 | 
 89 | ### Possible solution:
 90 | 
 91 | To fix this, convert the strings to integers/floats before multiplication: int(x) \* int(y).
 92 | 
 93 | ## Problem number 4:
 94 | 
 95 | ### Input:
 96 | 
 97 | ```python
 98 | Traceback (most recent call last):
 99 | File "stack_traces.py", line 36, in run_trace
100 | f()
101 | File "stack_traces.py", line 48, in <lambda>
102 | run_trace(4, lambda: perform_calculation(mult, [4], [3]))
103 | File "stack_traces.py", line 8, in perform_calculation
104 | calc(x, y)
105 | File "stack_traces.py", line 15, in mult
106 | return x \* y
107 | TypeError: can't multiply sequence by non-int of type 'list'
108 | ```
109 | 
110 | ### Root cause:
111 | 
112 | The error is a TypeError due to attempting to multiply a list by a non-integer.
113 | 
114 | ### First Line to check:
115 | 
116 | Investigate line 15 in the mult function, where x \* y is computed.
117 | 
118 | ### Possible solution:
119 | 
120 | Ensure that you are multiplying elements within the lists, not the entire lists themselves.
121 | 
122 | # Problem number 5:
123 | 
124 | ### Input:
125 | 
126 | ```python
127 | Traceback (most recent call last):
128 | File "stack_traces.py", line 36, in run_trace
129 | f()
130 | File "stack_traces.py", line 49, in <lambda>
131 | run_trace(5, lambda: perform_calculation(innoc, '1', 3))
132 | File "stack_traces.py", line 8, in perform_calculation
133 | calc(x, y)
134 | File "stack_traces.py", line 22, in innoc
135 | spelunk()
136 | File "stack_traces.py", line 21, in spelunk
137 | raise ValueError('Invalid')
138 | ValueError: Invalid
139 | ```
140 | 
141 | ### Root cause:
142 | 
143 | The problem is a ValueError raised with the message "Invalid," indicating an issue within the spelunk function.
144 | 
145 | ### First Line to check:
146 | 
147 | Go to line 21 in the spelunk function, where the ValueError is raised.
148 | 
149 | ### Possible solution:
150 | 
151 | Inspect the spelunk function to identify the specific issue that led to the ValueError and handle it appropriately.
152 | 
153 | ## Problem number 6:
154 | 
155 | ### Input:
156 | 
157 | ```python
158 | Traceback (most recent call last):
159 | File "stack_traces.py", line 36, in run_trace
160 | f()
161 | File "stack_traces.py", line 50, in <lambda>
162 | run_trace(6, lambda: comp_calc([1, 2, 3], 1, add))
163 | File "stack_traces.py", line 30, in comp_calc
164 | return [perform_calculation(calc, x_i, y_i) for x_i, y_i in zip(x, y)]
165 | TypeError: zip argument #2 must support iteration
166 | ```
167 | 
168 | ### Root cause:
169 | 
170 | This TypeError arises from trying to use zip with an argument that doesn't support iteration.
171 | 
172 | ### First Line to check:
173 | 
174 | Check line 30 in the comp_calc function, where the list comprehension uses zip.
175 | 
176 | ### Possible solution:
177 | 
178 | Ensure that both x and y in the comp_calc function are iterable (e.g., lists or tuples) before using zip.
179 | 
180 | ## Problem number 7:
181 | 
182 | ### Input:
183 | 
184 | ```python
185 | Traceback (most recent call last):
186 | File "stack_traces.py", line 36, in run_trace
187 | f()
188 | File "stack_traces.py", line 51, in <lambda>
189 | run_trace(7, lambda: comp_calc([1, 2, [3]], [4, 5, 6], add))
190 | File "stack_traces.py", line 30, in comp_calc
191 | return [perform_calculation(calc, x_i, y_i) for x_i, y_i in zip(x, y)]
192 | File "stack_traces.py", line 30, in <listcomp>
193 | return [perform_calculation(calc, x_i, y_i) for x_i, y_i in zip(x, y)]
194 | File "stack_traces.py", line 8, in perform_calculation
195 | calc(x, y)
196 | File "stack_traces.py", line 12, in add
197 | return x + y
198 | TypeError: can only concatenate list (not "int") to list
199 | ```
200 | 
201 | ### Root cause:
202 | 
203 | A TypeError occurs because the code is trying to concatenate a list and an integer.
204 | 
205 | ### First Line to check:
206 | 
207 | Go to line 12 in the add function, where x + y is calculated.
208 | 
209 | ### Possible solution:
210 | 
211 | Modify the code to perform a valid operation on the list elements, depending on your intention.
212 | 
213 | ## Problem number 8:
214 | 
215 | ### Input:
216 | 
217 | ```python
218 | Traceback (most recent call last):
219 | File "stack_traces.py", line 36, in run_trace
220 | f()
221 | File "stack_traces.py", line 52, in <lambda>
222 | run_trace(8, lambda: calc_dict({'one': 1, 'two': '2'}, 'one', 'two', add))
223 | File "stack_traces.py", line 26, in calc_dict
224 | return perform_calculation(calc, d[k1], d[k2])
225 | File "stack_traces.py", line 8, in perform_calculation
226 | calc(x, y)
227 | File "stack_traces.py", line 12, in add
228 | return x + y
229 | TypeError: unsupported operand type(s) for +: 'int' and 'str'
230 | ```
231 | 
232 | ### Root cause:
233 | 
234 | A TypeError is raised when trying to add an integer and a string while processing a dictionary.
235 | 
236 | ### First Line to check:
237 | 
238 | Examine line 12 in the add function, where x + y is executed.
239 | 
240 | ### Possible solution:
241 | 
242 | Ensure that the values retrieved from the dictionary are of integers or floats. If not then, first convert them into integers or floats based on the requirement.
243 | 
244 | ## Problem number 9:
245 | 
246 | ### Input:
247 | 
248 | ```python
249 | Traceback (most recent call last):
250 | File "stack_traces.py", line 36, in run_trace
251 | f()
252 | File "stack_traces.py", line 53, in <lambda>
253 | run_trace(9, lambda: calc_dict({}, 'one', 'two', add))
254 | File "stack_traces.py", line 26, in calc_dict
255 | return perform_calculation(calc, d[k1], d[k2])
256 | KeyError: 'one'
257 | ```
258 | 
259 | ### Root cause:
260 | 
261 | The problem is a KeyError occurring because the code tries to access dictionary keys that don't exist.
262 | 
263 | ### First Line to check:
264 | 
265 | Go to line 26 in the calc_dict function, where d[k1] and d[k2] are accessed.
266 | 
267 | ### Possible solution:
268 | 
269 | Check if the keys 'one' and 'two' exist in the dictionary before accessing them, and handle the case when they are not present.
270 | 


--------------------------------------------------------------------------------