├── __init__.py
├── free_space.py
├── shtuff
    ├── __init__.py
    ├── storage_handling.py
    ├── WIP_bias_adjustor.py
    ├── make_prediction.py
    ├── riven_funcs.py
    ├── WIP_liquidity_gradient.py
    └── data_handler.py
├── riven_tool
    ├── __init__.py
    └── rivens_analysis.py
├── training
    ├── __init__.py
    ├── trainers
    │   ├── __init__.py
    │   ├── train_liquidity_model.py
    │   └── train_price_model.py
    └── preprocessors
    │   ├── __init__.py
    │   ├── price_model_preprocessor.py
    │   └── liquidity_model_preprocessor.py
├── tool_setup_and_maintenance
    ├── __init__.py
    ├── auto_setup.py
    ├── download_data.py
    ├── setup_weapon_information.py
    └── create_marketplace_dataframe.py
├── .gitignore
├── data_files
    └── attribute_name_shortcuts.json
├── README.md
└── filepaths.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/free_space.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/shtuff/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/riven_tool/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/training/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tool_setup_and_maintenance/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/training/preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/shtuff/storage_handling.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def read_json(file_path: str):
 5 |     try:
 6 |         with open(file_path, "r", encoding="utf-8") as file:
 7 |             data = json.load(file)
 8 |         return data
 9 |     except UnicodeDecodeError as e:
10 |         print(f"An error occurred while reading the JSON file: {e}")
11 |         return None
12 | 
13 | 
14 | def save_json(file_path: str, data) -> None:
15 |     with open(file_path, "w", encoding="utf-8") as file:
16 |         json.dump(data, file, indent=4)
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore all __pycache__ directories and compiled Python files within the broader scope and project directory
 2 | __pycache__/
 3 | *.pyc
 4 | *.pyo
 5 | 
 6 | # Ignore specific directories within warframe_marketplace_predictor
 7 | training/model_data/
 8 | old_stuff/
 9 | misc/
10 | 
11 | # Ignore specific files within data_files directory in warframe_marketplace_predictor
12 | data_files/attributes_data.json
13 | data_files/developer_summary_stats.json
14 | data_files/global_price_freq.json
15 | data_files/ig_weapon_stats.json
16 | data_files/items_data.json
17 | data_files/public_spreadsheet.csv
18 | data_files/raw_marketplace_data.json
19 | data_files/weapon_ranking_information.json
20 | 
21 | # Ignore hidden directories and configuration files at the root level (outside warframe_marketplace_predictor)
22 | .anaconda/
23 | .cache/
24 | .conda/
25 | .condarc
26 | .config/
27 | .continuum/
28 | .gitconfig
29 | .gnupg/
30 | .ipython/
31 | .jupyter/
32 | .keras/
33 | .matplotlib/
34 | .idea/


--------------------------------------------------------------------------------
/data_files/attribute_name_shortcuts.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ammo": "ammo_maximum",
 3 |   "ammo_max": "ammo_maximum",
 4 |   "corp": "damage_vs_corpus",
 5 |   "cor": "damage_vs_corpus",
 6 |   "corpus": "damage_vs_corpus",
 7 |   "grin": "damage_vs_grineer",
 8 |   "grineer": "damage_vs_grineer",
 9 |   "inf": "damage_vs_infested",
10 |   "infested": "damage_vs_infested",
11 |   "cold": "cold_damage",
12 |   "init": "channeling_damage",
13 |   "initial": "channeling_damage",
14 |   "initial_combo": "channeling_damage",
15 |   "heavy": "channeling_efficiency",
16 |   "heavy_attack": "channeling_efficiency",
17 |   "heavy_attack_efficiency": "channeling_efficiency",
18 |   "heavy_efficiency": "channeling_efficiency",
19 |   "combo_duration": "combo_duration",
20 |   "combo_dur": "combo_duration",
21 |   "cc": "critical_chance",
22 |   "ccs": "critical_chance_on_slide_attack",
23 |   "cd": "critical_damage",
24 |   "damage": "base_damage_/_melee_damage",
25 |   "dmg": "base_damage_/_melee_damage",
26 |   "ele": "electric_damage",
27 |   "electric": "electric_damage",
28 |   "electricity": "electric_damage",
29 |   "heat": "heat_damage",
30 |   "fin": "finisher_damage",
31 |   "finisher": "finisher_damage",
32 |   "rate": "fire_rate_/_attack_speed",
33 |   "fire_rate": "fire_rate_/_attack_speed",
34 |   "attack_speed": "fire_rate_/_attack_speed",
35 |   "speed": "fire_rate_/_attack_speed",
36 |   "proj": "projectile_speed",
37 |   "projectile": "projectile_speed",
38 |   "imp": "impact_damage",
39 |   "impact": "impact_damage",
40 |   "magazine": "magazine_capacity",
41 |   "mag": "magazine_capacity",
42 |   "mag_cap": "magazine_capacity",
43 |   "multishot": "multishot",
44 |   "ms": "multishot",
45 |   "multi": "multishot",
46 |   "toxin": "toxin_damage",
47 |   "tox": "toxin_damage",
48 |   "punch_through": "punch_through",
49 |   "punch": "punch_through",
50 |   "puncture": "puncture_damage",
51 |   "punc": "puncture_damage",
52 |   "reload_speed": "reload_speed",
53 |   "reload": "reload_speed",
54 |   "rs": "reload_speed",
55 |   "range": "range",
56 |   "slash": "slash_damage",
57 |   "status_chance": "status_chance",
58 |   "sc": "status_chance",
59 |   "stat_dur": "status_duration",
60 |   "status_dur": "status_duration",
61 |   "sd": "status_duration",
62 |   "recoil": "recoil",
63 |   "zoom": "zoom",
64 |   "chance_extra_combo": "chance_to_gain_extra_combo_count",
65 |   "chance_combo": "chance_to_gain_combo_count",
66 |   "chance_gain_combo": "chance_to_gain_combo_count",
67 |   "chance_combo_count": "chance_to_gain_combo_count",
68 |   "add": "chance_to_gain_extra_combo_count",
69 |   "additional": "chance_to_gain_extra_combo_count",
70 |   "additional_combo": "chance_to_gain_extra_combo_count",
71 |   "": "<NONE>",
72 |   "<NONE>": "<NONE>"
73 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Warframe Riven Pricer
 2 | 
 3 | This project aims to predict the price of Warframe Rivens using a neural network trained on a comprehensive dataset from the Warframe Marketplace. Below is a detailed guide on setting up, training, and using the model.
 4 | 
 5 | ## Setup
 6 | 
 7 | 0. **Rename Folder**
 8 |    - Rename the Folder from 'WarframeRivenPricer' to 'warframe_marketplace_predictor'.
 9 |    - I hate directories and this part of coding.
10 | 
11 | 1. **Install Dependencies**
12 |    - Ensure you have the necessary packages installed, primarily TensorFlow and essential data science libraries.
13 | 
14 | 2. **Download and Prepare Data**
15 |    - Run the `auto_setup.py` script to download and prepare Warframe Riven data from the Warframe Marketplace. This dataset includes various attributes of Rivens, such as their names and other relevant details.
16 | 
17 | 3. **Make Predictions**
18 |    - Use the `rivens_analysis` script to predict the price of new Rivens based on their attributes.
19 | 
20 | ## How It Works
21 | 
22 | The model uses a neural network to estimate the market price of Rivens based on their attributes and reroll count. By analyzing a large dataset of Rivens and their listing prices, the model predicts how much a new Riven might be worth. 
23 | 
24 | The neural network was extensively tested with various levels of complexity in both its structure and data preparation. It was found that increasing model complexity had minimal impact on performance. Additionally, including detailed attributes such as mastery level, polarity, mod rank, disposition, popularity, and specific Riven values (e.g., crit_chance=97% vs. just crit_chance) did not significantly improve accuracy. 
25 | 
26 | The model provides an estimate of Riven value based on market trends but does not guarantee the actual selling price. The predicted value reflects how similar Rivens are valued by others and may differ from the final selling price. Marketplace prices may often be higher than actual traded prices, reflecting an upward shift in listing prices.
27 | 
28 | ### Key Points
29 | - **Dataset**: Contains approximately 200K Riven entries from the Warframe Marketplace.
30 | - **Prediction**: The model offers a general idea of a Riven’s market value. It cannot predict the exact selling price but provides an estimate based on market listings.
31 | - **Limitations**: The model may not account for every factor influencing Riven prices, such as specific attributes or nuances that affect actual trade values.
32 | 
33 | ## Insights and Future Plans
34 | 
35 | 1. **Value Prediction (DONE)**: The model could be expanded to predict the value of every possible Riven, offering a probability distribution over potential prices and insights into investment potential.
36 | ![](https://i.imgur.com/UJKjYV4.png)
37 | 
38 | 2. **Outlier Detection**: Applying the model to the entire dataset may help identify undervalued or overvalued Rivens, highlighting potential bargains or overpriced items.
39 | 
40 | 3. **Bias Correction**: Future work will focus on addressing model biases. One approach involves utilizing summary statistics of actually traded Rivens provided by the developers. By applying gradient descent (or similar techniques) to our predicted price distribution, we can adjust and shift it towards these true summary statistics. This method maps each value in the original distribution to a new value, forming a distribution that more closely aligns with the actual traded distribution.
41 | 
42 | Feel free to contribute to the project or reach out if you have any questions or suggestions!
43 | 


--------------------------------------------------------------------------------
/tool_setup_and_maintenance/auto_setup.py:
--------------------------------------------------------------------------------
 1 | from warframe_marketplace_predictor import filepaths
 2 | from warframe_marketplace_predictor.tool_setup_and_maintenance import setup_weapon_information, download_data, \
 3 |     create_marketplace_dataframe
 4 | from warframe_marketplace_predictor.training.trainers import train_price_model
 5 | 
 6 | 
 7 | def main(run_full_pipeline=False,
 8 |          download_onwards=False,
 9 |          dataframe_onwards=False,
10 |          train_onwards=False,
11 |          weapon_ranks_onwards=False,
12 |          overwrite_marketplace=False):
13 |     def run_pipeline(verify_file_paths=False,
14 |                      download_data_files=False,
15 |                      create_dataframe=False,
16 |                      train_model=False,
17 |                      setup_weapon_ranks=False):
18 |         if verify_file_paths:
19 |             print("Verifying file paths...")
20 |             filepaths.main()
21 |             print("File paths verified.")
22 | 
23 |         if download_data_files:
24 |             print("Downloading all data... (This may take approximately 15 minutes)")
25 |             if overwrite_marketplace is True:
26 |                 if (input("WARNING: You are about to delete and replace your marketplace data. Type 'YES' to confirm ")
27 |                         == "YES"):
28 |                     download_data.main(running_all=True, overwrite_marketplace_data=True)
29 |             else:
30 |                 download_data.main(running_all=True, overwrite_marketplace_data=False)
31 |             print("Marketplace data downloaded successfully.")
32 | 
33 |         if create_dataframe:
34 |             print("Creating training dataframe...")
35 |             create_marketplace_dataframe.main()
36 |             print("Dataframe created successfully.")
37 | 
38 |         if train_model:
39 |             print("Training the model... (This may take approximately 5 minutes)")
40 |             train_price_model.train_model(show_graph=False)
41 |             print("Model training completed.")
42 | 
43 |         if setup_weapon_ranks:
44 |             print("Setting up weapon ranks... (This may take approximately 20 minutes)")
45 |             setup_weapon_information.main()
46 |             print("Weapon ranks setup complete.")
47 | 
48 |     # Define scenarios based on input flags
49 |     if run_full_pipeline:
50 |         run_pipeline(verify_file_paths=True,
51 |                      download_data_files=True,
52 |                      create_dataframe=True,
53 |                      train_model=True,
54 |                      setup_weapon_ranks=True)
55 |     elif download_onwards:
56 |         run_pipeline(download_data_files=True, create_dataframe=True, train_model=True, setup_weapon_ranks=True)
57 |     elif dataframe_onwards:
58 |         run_pipeline(create_dataframe=True, train_model=True, setup_weapon_ranks=True)
59 |     elif train_onwards:
60 |         run_pipeline(train_model=True, setup_weapon_ranks=True)
61 |     elif weapon_ranks_onwards:
62 |         run_pipeline(setup_weapon_ranks=True)
63 |     else:
64 |         print("No valid scenario selected.")
65 |         return
66 | 
67 |     print("Setup complete. You may now navigate to 'rivens_analysis',"
68 |           " scroll to the bottom, input your rivens, and run the script.")
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main(
73 |         run_full_pipeline=True,
74 | 
75 |         download_onwards=False,
76 |         dataframe_onwards=False,
77 |         train_onwards=False,
78 |         weapon_ranks_onwards=False,
79 | 
80 |         overwrite_marketplace=True
81 |     )
82 | 


--------------------------------------------------------------------------------
/riven_tool/rivens_analysis.py:
--------------------------------------------------------------------------------
  1 | from warframe_marketplace_predictor.shtuff import riven_funcs
  2 | 
  3 | if __name__ == "__main__":
  4 |     rivens = [
  5 |         {
  6 |             "name": "Dual Ichor",
  7 |             "positives": ["sc", "tox", "ccs"],
  8 |             "negatives": ["fin"],
  9 |             "re_rolls": 9
 10 |         },
 11 | 
 12 |         {
 13 |             "name": "Acceltra",
 14 |             "positives": ["dmg", "ms", "ele"],
 15 |             "negatives": ["ammo"],
 16 |             "re_rolls": 9
 17 |         },
 18 |         {
 19 |             "name": "Afentis",
 20 |             "positives": ["ms", "dmg", "mag"],
 21 |             "negatives": [""],
 22 |             "re_rolls": 9
 23 |         },
 24 |         {
 25 |             "name": "Anku",
 26 |             "positives": ["sc", "dmg", "cd"],
 27 |             "negatives": ["heavy"],
 28 |             "re_rolls": 9
 29 |         },
 30 |         {
 31 |             "name": "Caustacyst",
 32 |             "positives": ["ele", "cc", "cd"],
 33 |             "negatives": ["sc"],
 34 |             "re_rolls": 9
 35 |         },
 36 |         {
 37 |             "name": "Convectrix",
 38 |             "positives": ["ms", "ele", "dmg"],
 39 |             "negatives": ["slash"],
 40 |             "re_rolls": 9
 41 |         },
 42 |         {
 43 |             "name": "Dark Split-Sword",
 44 |             "positives": ["range", "ele", ""],
 45 |             "negatives": ["ccs"],
 46 |             "re_rolls": 9
 47 |         },
 48 |         {
 49 |             "name": "Dual Ichor",
 50 |             "positives": ["cc", "range", ""],
 51 |             "negatives": ["stat_dur"],
 52 |             "re_rolls": 9
 53 |         },
 54 |         {
 55 |             "name": "Dual Toxocyst",
 56 |             "positives": ["cc", "tox", "punch"],
 57 |             "negatives": ["corp"],
 58 |             "re_rolls": 9
 59 |         },
 60 |         {
 61 |             "name": "Furax",
 62 |             "positives": ["speed", "range", ""],
 63 |             "negatives": ["heavy"],
 64 |             "re_rolls": 9
 65 |         },
 66 |         {
 67 |             "name": "Nukor",
 68 |             "positives": ["mag", "ms", "tox"],
 69 |             "negatives": ["inf"],
 70 |             "re_rolls": 9
 71 |         },
 72 |         {
 73 |             "name": "Phenmor",
 74 |             "positives": ["mag", "ms", "proj"],
 75 |             "negatives": [""],
 76 |             "re_rolls": 9
 77 |         },
 78 |         {
 79 |             "name": "Praedos",
 80 |             "positives": ["cc", "slash", "dmg"],
 81 |             "negatives": [""],
 82 |             "re_rolls": 9
 83 |         },
 84 |         {
 85 |             "name": "Rubico",
 86 |             "positives": ["cd", "cold", "tox"],
 87 |             "negatives": ["zoom"],
 88 |             "re_rolls": 3
 89 |         },
 90 |         {
 91 |             "name": "Sporothrix",
 92 |             "positives": ["ms", "tox", "mag"],
 93 |             "negatives": [""],
 94 |             "re_rolls": 9
 95 |         },
 96 |         {
 97 |             "name": "Sybaris",
 98 |             "positives": ["speed", "cc", "dmg"],
 99 |             "negatives": ["grin"],
100 |             "re_rolls": 9
101 |         },
102 |         {
103 |             "name": "Tenora",
104 |             "positives": ["cd", "grin", "tox"],
105 |             "negatives": ["sd"],
106 |             "re_rolls": 3
107 |         },
108 |         {
109 |             "name": "Zenith",
110 |             "positives": ["cc", "cd", "ms"],
111 |             "negatives": ["ammo"],
112 |             "re_rolls": 4
113 |         },
114 | 
115 |     ]
116 | 
117 |     riven_funcs.analyze_rivens(rivens)
118 | 


--------------------------------------------------------------------------------
/filepaths.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | # Get the absolute path to the directory containing this file
  4 | base_dir = os.path.dirname(os.path.abspath(__file__))
  5 | 
  6 | # Define the folder names
  7 | data_folder_name = "data_files"
  8 | training_folder_name = "training"
  9 | model_data_folder_name = "model_data"
 10 | sub_models_folder_name = "sub_models"
 11 | 
 12 | # Downloaded data
 13 | items_data_file_path = os.path.join(base_dir, data_folder_name, "items_data.json")
 14 | attributes_data_file_path = os.path.join(base_dir, data_folder_name, "attributes_data.json")
 15 | attribute_name_shortcuts_file_path = os.path.join(base_dir, data_folder_name, "attribute_name_shortcuts.json")
 16 | raw_marketplace_data_file_path = os.path.join(base_dir, data_folder_name, "raw_marketplace_data.json")
 17 | developer_summary_stats_file_path = os.path.join(base_dir, data_folder_name, "developer_summary_stats.json")
 18 | ig_weapon_stats_file_path = os.path.join(base_dir, data_folder_name, "ig_weapon_stats.json")
 19 | 
 20 | # Generated data
 21 | marketplace_dataframe_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 22 |                                                "marketplace_dataframe.csv")
 23 | 
 24 | # -- Regular Model Paths --
 25 | price_model_model_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 26 |                                            "price_model.h5")
 27 | price_model_preprocessor_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 28 |                                                   "price_preprocessor.pkl")
 29 | price_model_kmeans_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name, "price_kmeans.pkl")
 30 | 
 31 | # -- Mixture of Experts Model Paths --
 32 | # moe_price_sub_models_model_directory = os.path.join(base_dir, training_folder_name, sub_models_folder_name)
 33 | # moe_price_sub_models_preprocessor_directory = os.path.join(base_dir, training_folder_name, sub_models_folder_name)
 34 | # moe_price_model_model_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 35 | #                                                "moe_price_model.h5")
 36 | # moe_price_model_preprocessor_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 37 | #                                                       "moe_price_preprocessor.pkl")
 38 | 
 39 | # -- Liquidity Model Paths --
 40 | liquidity_model_model_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 41 |                                                "liquidity_model.h5")
 42 | liquidity_model_preprocessor_file_path = os.path.join(base_dir, training_folder_name, model_data_folder_name,
 43 |                                                       "liquidity_preprocessor.pkl")
 44 | 
 45 | # -- Weapon Information Paths --
 46 | weapon_ranking_information_file_path = os.path.join(base_dir, data_folder_name,
 47 |                                                     "weapon_ranking_information.json")
 48 | global_price_freq_file_path = os.path.join(base_dir, data_folder_name,
 49 |                                            "global_price_freq.json")
 50 | 
 51 | public_spreadsheet_file_path = os.path.join(base_dir, data_folder_name,
 52 |                                             "public_spreadsheet.csv")
 53 | 
 54 | 
 55 | def create_files_if_not_exist(paths):
 56 |     for path in paths:
 57 |         # Ensure the directory exists
 58 |         dir_name = os.path.dirname(path)
 59 |         if not os.path.exists(dir_name):
 60 |             os.makedirs(dir_name)
 61 |             print(f"Created directory: {dir_name}")
 62 | 
 63 |         # Check if the file already exists
 64 |         if not os.path.isfile(path):
 65 |             # Create the file if it doesn't exist
 66 |             with open(path, "w"):
 67 |                 pass
 68 |             print(f"Created empty file: {path}")
 69 |         else:
 70 |             print(f"File already exists: {path}")
 71 | 
 72 | 
 73 | def create_directories(directories):
 74 |     for directory in directories:
 75 |         if not os.path.exists(directory):
 76 |             os.makedirs(directory)
 77 |             print(f"Created directory: {directory}")
 78 |         else:
 79 |             print(f"Directory already exists: {directory}")
 80 | 
 81 | 
 82 | def main():
 83 |     # List of file paths to check and create if they don't exist
 84 |     file_paths = [
 85 |         items_data_file_path,
 86 |         attributes_data_file_path,
 87 |         attribute_name_shortcuts_file_path,
 88 |         raw_marketplace_data_file_path,
 89 |         developer_summary_stats_file_path,
 90 |         ig_weapon_stats_file_path,
 91 | 
 92 |         marketplace_dataframe_file_path,
 93 | 
 94 |         price_model_model_file_path,  # genius naming scheme
 95 |         price_model_preprocessor_file_path,
 96 |         price_model_kmeans_file_path,
 97 | 
 98 |         # moe_price_model_model_file_path,
 99 |         # moe_price_model_preprocessor_file_path,
100 | 
101 |         weapon_ranking_information_file_path,
102 |         global_price_freq_file_path,
103 | 
104 |         public_spreadsheet_file_path
105 |     ]
106 | 
107 |     # List of directories to ensure exist
108 |     directories = [
109 |         # moe_price_sub_models_model_directory,
110 |         # moe_price_sub_models_preprocessor_directory
111 |     ]
112 | 
113 |     # Create directories first
114 |     create_directories(directories)
115 | 
116 |     # Call the function to create files
117 |     create_files_if_not_exist(file_paths)
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     main()
122 | 


--------------------------------------------------------------------------------
/shtuff/WIP_bias_adjustor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | IN-PROGRESS
  3 | 
  4 | The method by which we adjust bias needs to both be efficient and have some
  5 | statistical science as to why it is a valid adjustor.
  6 | """
  7 | import collections
  8 | from typing import Dict, Union
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | import numpy as np
 12 | import pandas as pd
 13 | import seaborn as sns
 14 | import tqdm
 15 | 
 16 | 
 17 | def diagnose_transformation(original_values: np.ndarray,
 18 |                             transformed_values: np.ndarray,
 19 |                             target_summary_statistics: Dict[str, Union[float, int]]):
 20 |     """
 21 |     Visualizes the original and transformed distributions and prints summary statistics.
 22 | 
 23 |     Parameters:
 24 |     - original_values: The original predicted listing prices.
 25 |     - transformed_values: The transformed values after optimization.
 26 |     - summary_statistics: The target summary statistics for the traded prices.
 27 |     """
 28 |     # Compute original and new summary statistics
 29 |     original_stats = calculate_summary_statistics(original_values)
 30 |     new_stats = calculate_summary_statistics(transformed_values)
 31 | 
 32 |     # Print the summary statistics
 33 |     print("Original Summary Statistics:")
 34 |     for key, value in original_stats.items():
 35 |         print(f"  {key}: {value:.4f}")
 36 | 
 37 |     print("\nTarget Summary Statistics (Given):")
 38 |     for key, value in target_summary_statistics.items():
 39 |         print(f"  {key}: {value:.4f}")
 40 | 
 41 |     print("\nNew Summary Statistics (After Transformation):")
 42 |     for key, value in new_stats.items():
 43 |         print(f"  {key}: {value:.4f}")
 44 | 
 45 |     # Create a plot to visualize the distributions
 46 |     plt.figure(figsize=(10, 6))
 47 | 
 48 |     sns.kdeplot(original_values, color="blue", label="Original Distribution", linewidth=2, bw_adjust=0.1)
 49 |     sns.kdeplot(transformed_values, color="red", label="Transformed Distribution", linewidth=2, bw_adjust=0.1)
 50 | 
 51 |     plt.title("Comparison of Original and Transformed Distributions")
 52 |     plt.xlabel("Values")
 53 |     plt.ylabel("Density")
 54 |     plt.legend()
 55 | 
 56 |     plt.show()
 57 | 
 58 | 
 59 | def calculate_summary_statistics(values: np.ndarray) -> Dict[str, float]:
 60 |     return {
 61 |         "avg": values.mean(),
 62 |         "stddev": values.std(),
 63 |         "median": np.median(values),
 64 |         "min": values.min(),
 65 |         "max": values.max()
 66 |     }
 67 | 
 68 | 
 69 | def min_max_scale(values: np.ndarray, min_: float = None, max_: float = None) -> np.ndarray:
 70 |     values_min = values.min()
 71 |     values_max = values.max()
 72 |     values_std = (values - values_min) / (values_max - values_min)
 73 |     values_scaled = values_std * (max_ - min_) + min_
 74 |     return values_scaled
 75 | 
 76 | 
 77 | def gaussian_kernel(size: int, sigma: float = 1) -> np.ndarray:
 78 |     """Returns a 1D Gaussian kernel of length 'size' with standard deviation 'sigma'."""
 79 |     if size % 2 == 0:
 80 |         raise ValueError("Length of the Gaussian kernel must be odd.")
 81 |     center = size // 2
 82 |     x = np.arange(0, size) - center
 83 |     kernel = np.exp(-(x ** 2) / (2 * sigma ** 2))
 84 |     kernel /= np.sum(kernel)  # Normalize the kernel so that the sum is 1
 85 |     return kernel
 86 | 
 87 | 
 88 | def calculate_loss(values: np.ndarray, target_summary_statistics: Dict[str, float]) -> float:
 89 |     summ_stats = calculate_summary_statistics(values)
 90 |     loss = 0
 91 |     for metric in ["avg", "stddev"]:
 92 |         loss += (summ_stats[metric] - target_summary_statistics[metric]) ** 2
 93 |     return loss
 94 | 
 95 | 
 96 | def adjust_values(values: pd.Series, target_summary_statistics: Dict[str, float]) -> np.ndarray:
 97 |     """Adjust values using gradient descent and smooth updates with a Gaussian kernel."""
 98 | 
 99 |     values = np.array(values)
100 | 
101 |     # Scale the initial values to match the target min/max range
102 |     new_values = min_max_scale(values, target_summary_statistics["min"], target_summary_statistics["max"])
103 | 
104 |     # Create Gaussian kernel
105 |     kernel = gaussian_kernel(11, sigma=1)
106 | 
107 |     # Hyperparameters
108 |     max_iter = 1000
109 |     learning_rate = 1
110 | 
111 |     pbar = tqdm.tqdm(range(max_iter))
112 |     for _ in pbar:
113 |         orig_loss = calculate_loss(new_values, target_summary_statistics)
114 | 
115 |         # For each value, compute its impact on the loss
116 |         gradient = np.zeros_like(new_values)
117 | 
118 |         for i in range(len(new_values)):
119 |             # Small step forward
120 |             new_values[i] += learning_rate
121 |             loss_increase = calculate_loss(new_values, target_summary_statistics)
122 | 
123 |             # Compute gradient (difference in loss)
124 |             gradient[i] = (loss_increase - orig_loss) / learning_rate
125 | 
126 |             # Revert the change
127 |             new_values[i] -= learning_rate
128 | 
129 |         # Update all values using the gradient and smooth it with Gaussian kernel
130 |         smoothed_gradient = np.convolve(gradient, kernel, mode='same')
131 |         new_values -= learning_rate * smoothed_gradient
132 |         new_values = min_max_scale(new_values, target_summary_statistics["min"], target_summary_statistics["max"])
133 | 
134 |         # Check for convergence (optional stopping criteria)
135 |         loss = np.abs(orig_loss - calculate_loss(new_values, target_summary_statistics))
136 |         pbar.set_postfix(loss=orig_loss)
137 |         if loss < 1e-6:
138 |             break
139 | 
140 |     print("Final Loss:", calculate_loss(new_values, target_summary_statistics))
141 |     d = collections.defaultdict(list)
142 |     for v1, v2 in zip(values, new_values):
143 |         d[v1].append(v2)
144 |     for v1, v2l in d.items():
145 |         print(v1, v2l)
146 |     diagnose_transformation(values, new_values, target_summary_statistics)
147 |     quit()
148 | 
149 |     return new_values
150 | 


--------------------------------------------------------------------------------
/shtuff/make_prediction.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, Union, Iterable
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import tensorflow as tf
  6 | from tqdm import tqdm
  7 | 
  8 | from warframe_marketplace_predictor.filepaths import *
  9 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 10 | from warframe_marketplace_predictor.training.preprocessors.price_model_preprocessor import Preprocessor
 11 | 
 12 | 
 13 | class PricePredictor:
 14 |     def __init__(self, model_predict_batch_size: int = 256):
 15 |         """
 16 |         Initializes the PricePredictor class, loading the preprocessor, model,
 17 |         attribute name shortcuts, and item name to URL mapping only once.
 18 |         """
 19 |         self.model_predict_batch_size = model_predict_batch_size
 20 |         self.data_handler = DataHandler()
 21 | 
 22 |         # Load the preprocessor and model once
 23 |         self.preprocessor = Preprocessor().load()
 24 |         self.model: tf.keras.Model = tf.keras.models.load_model(price_model_model_file_path)
 25 |         self._mask_token = "<NONE>"
 26 | 
 27 |     def is_valid(self, item: Dict[str, Any]) -> bool:
 28 |         """
 29 |         Checks if the provided attribute names (positives + negatives) are valid by comparing them against the shortcuts.
 30 | 
 31 |         Args:
 32 |             item (Dict[str, Any]): A dictionary containing the item's "positives" and "negatives" attributes.
 33 | 
 34 |         Returns:
 35 |             bool: True if all attribute names are valid, False otherwise.
 36 |         """
 37 |         if not self.data_handler.weapon_exists(item["name"]):
 38 |             print(f"{item['name']} is not a valid weapon name")
 39 |             print("Name suggestions:")
 40 |             print([k for k in sorted(self.data_handler.get_item_names())
 41 |                    if k and item["name"] and (k[0]).lower() == (item["name"][0]).lower()])
 42 |             return False
 43 | 
 44 |         if "re_rolls" in item:
 45 |             if not isinstance(item["re_rolls"], int):
 46 |                 print("'re_rolls' must be an integer.")
 47 |                 return False
 48 |             item["re_rolled"] = item["re_rolls"] > 0
 49 | 
 50 |         if "re_rolled" not in item or not isinstance(item["re_rolled"], bool):
 51 |             print("'re_rolled' is missing or incorrectly formatted.")
 52 |             return False
 53 | 
 54 |         # Combine the positives and negatives from the item to validate
 55 |         attribute_names = item["positives"] + item["negatives"]
 56 |         for attribute_name in attribute_names:
 57 |             if not self.data_handler.is_valid_attribute_shortcut(attribute_name):
 58 |                 print(f"{attribute_name} is not a valid attribute.")
 59 |                 print("Did you mean:")
 60 |                 print([k for k in sorted(self.data_handler.get_attribute_shortcuts())
 61 |                        if k and attribute_name and k[0] == attribute_name[0]])
 62 |                 return False
 63 | 
 64 |         return True
 65 | 
 66 |     def prepare(self, item: Dict[str, Any]) -> Dict[str, Any]:
 67 |         res = {
 68 |             "weapon_url_name": self.data_handler.get_url_name(item["name"]),
 69 |             "positive1": item["positives"][0] if len(item["positives"]) >= 1 else self._mask_token,
 70 |             "positive2": item["positives"][1] if len(item["positives"]) >= 2 else self._mask_token,
 71 |             "positive3": item["positives"][2] if len(item["positives"]) >= 3 else self._mask_token,
 72 |             "negative": item["negatives"][0] if len(item["negatives"]) >= 1 else self._mask_token,
 73 |             "re_rolled": item["re_rolled"],
 74 |             # "group": self.data_handler.get_weapon_group(self.data_handler.get_url_name(item["name"])),
 75 |             # "has_incarnon": self.data_handler.weapon_has_incarnon(self.data_handler.get_url_name(item["name"])),
 76 |             # "avg_trade_price": self.data_handler.get_average_trade_price(
 77 |             #     self.data_handler.get_url_name(item["name"]), rolled_status="rerolled"),
 78 |             # "disposition": self.data_handler.get_disposition(self.data_handler.get_url_name(item["name"])),
 79 |         }
 80 |         return res
 81 | 
 82 |     def get_prepared_data(self, data: Iterable[Dict[str, Any]], skip_validation: bool, verbose: bool) -> pd.DataFrame:
 83 |         # If data is a single dictionary, wrap it in a list for consistent processing
 84 |         if isinstance(data, dict):
 85 |             data = [data]
 86 | 
 87 |         prepared_data = []
 88 | 
 89 |         iterator = tqdm(data, desc="Preparing data", unit="riven") if verbose else data
 90 |         for item in iterator:
 91 | 
 92 |             # Check for invalid names or shortcuts
 93 |             if not skip_validation and not self.is_valid(item):
 94 |                 return pd.DataFrame([])
 95 | 
 96 |             row = self.prepare(item)
 97 | 
 98 |             if not skip_validation:
 99 |                 row["positive1"] = self.data_handler.get_proper_attribute_name(row["positive1"])
100 |                 row["positive2"] = self.data_handler.get_proper_attribute_name(row["positive2"])
101 |                 row["positive3"] = self.data_handler.get_proper_attribute_name(row["positive3"])
102 |                 row["negative"] = self.data_handler.get_proper_attribute_name(row["negative"])
103 | 
104 |             prepared_data.append(row)
105 | 
106 |         prepared_data = pd.DataFrame(prepared_data)
107 |         return prepared_data
108 | 
109 |     def predict(self,
110 |                 data: Union[Iterable[Dict[str, Any]], Dict[str, Any], pd.DataFrame],
111 |                 verbose: bool = True,
112 |                 skip_validation: bool = False, raw: bool = False) -> Union[np.ndarray, np.float32]:
113 |         """
114 |         Predicts outcomes based on the provided input data using a pre-trained model.
115 |         """
116 | 
117 |         if single_entry_flag := isinstance(data, dict):
118 |             data = [data]
119 | 
120 |         if not raw:
121 |             data = self.get_prepared_data(data, skip_validation, verbose)
122 | 
123 |         model_ready_data = self.preprocessor.transform(data)
124 |         predictions = self.model.predict(model_ready_data,
125 |                                          batch_size=self.model_predict_batch_size,
126 |                                          verbose=verbose).reshape(-1)
127 | 
128 |         if raw:
129 |             return predictions
130 |         elif single_entry_flag:
131 |             return np.expm1(predictions)[0]
132 |         else:
133 |             return np.expm1(predictions)
134 | 
135 | 
136 | def main():
137 |     # Examples
138 |     rivens = [
139 |         {
140 |             "name": "Verglas",
141 |             "positives": ["dmg", "cc", ""],
142 |             "negatives": [""],
143 |             "re_rolled": True
144 |         },
145 |         {
146 |             "name": "Verglas",
147 |             "positives": ["imp", "corp", ""],
148 |             "negatives": [""],
149 |             "re_rolled": True
150 |         },
151 |         {
152 |             "name": "Verglas",
153 |             "positives": ["ms", "cd", ""],
154 |             "negatives": [""],
155 |             "re_rolled": True
156 |         },
157 |     ]
158 | 
159 |     predictor = PricePredictor()
160 |     predictions = predictor.predict(rivens)
161 | 
162 |     for riven, prediction in zip(rivens, predictions):
163 |         print(f"{riven['name']} riven is estimated to be listed at {prediction:.0f} platinum")
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     main()
168 | 


--------------------------------------------------------------------------------
/tool_setup_and_maintenance/download_data.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import time
  3 | from typing import Dict
  4 | 
  5 | import requests
  6 | import tqdm
  7 | 
  8 | from warframe_marketplace_predictor.filepaths import *
  9 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 10 | from warframe_marketplace_predictor.shtuff.storage_handling import save_json, read_json
 11 | 
 12 | 
 13 | # If anything breaks, surely it was a cosmic bit flip.
 14 | 
 15 | 
 16 | def fetch_data(url: str, delay: float = 0.1) -> Dict:
 17 |     """
 18 |     Fetches data from a given URL, with retries in case of rate limiting or failure.
 19 | 
 20 |     Args:
 21 |         url (str): The API endpoint to fetch data from.
 22 |         delay (float): The delay in seconds before retrying on rate limits. Defaults to 0.1.
 23 | 
 24 |     Returns:
 25 |         Dict: JSON data fetched from the API or an empty dictionary in case of an error.
 26 |     """
 27 |     if delay >= 3.0:
 28 |         print(f"Delay limit reached. Aborting {url}")
 29 |         return dict()
 30 | 
 31 |     try:
 32 |         response = requests.get(url, headers={"accept": "application/json"})
 33 |         # Handle rate-limiting (status code 429)
 34 |         if response.status_code == 429:  # Too Many Requests
 35 |             print("Rate limited. Retrying...")
 36 |             time.sleep(delay)
 37 |             return fetch_data(url, min(60.0, delay * 2))
 38 | 
 39 |         # Raise an exception for other HTTP errors
 40 |         response.raise_for_status()
 41 |         return response.json()
 42 | 
 43 |     except requests.exceptions.HTTPError as err:
 44 |         print(f"HTTP error occurred: {err}")
 45 |     except requests.exceptions.RequestException as err:
 46 |         print(f"Error occurred: {err}")
 47 | 
 48 |     return dict()  # Fallback in case of any error
 49 | 
 50 | 
 51 | def download_items_data(the_url: str = "https://api.warframe.market/v1/riven/items") -> None:
 52 |     """
 53 |     Downloads item data from the API and saves mappings between item names and their URL representations.
 54 | 
 55 |     Args:
 56 |         the_url (str): The API endpoint to fetch item data from. Defaults to Warframe Riven items API.
 57 |     """
 58 |     items_data = fetch_data(the_url)["payload"]["items"]
 59 |     item_name_items_data = {x["item_name"]: x for x in items_data}
 60 |     save_json(items_data_file_path, item_name_items_data)
 61 | 
 62 |     print("Downloaded and saved items data. \n")
 63 | 
 64 | 
 65 | def download_attributes_data(the_url: str = "https://api.warframe.market/v1/riven/attributes") -> None:
 66 |     """
 67 |     Downloads attribute data from the API and saves it.
 68 | 
 69 |     Args:
 70 |         the_url (str): The API endpoint to fetch attribute data from. Defaults to Warframe Riven attributes API.
 71 |     """
 72 |     attributes_data = fetch_data(the_url)["payload"]["attributes"]
 73 |     attributes_data_mapped = {x["url_name"]: x for x in attributes_data if x["url_name"] not in ["has", "none"]}
 74 |     save_json(attributes_data_file_path, attributes_data_mapped)
 75 | 
 76 |     print("Downloaded and saved attributes data.\n")
 77 | 
 78 | 
 79 | def download_marketplace_database(overwrite: bool = True) -> None:
 80 |     """
 81 |     Downloads marketplace data and saves the raw data to a file.
 82 | 
 83 |     Args:
 84 |         overwrite (bool): If True, it downloads a fresh batch. If False, will update and append to existing data.
 85 |     """
 86 |     if overwrite:
 87 |         if (input("WARNING: You are about to delete and replace your marketplace data. Type 'YES' to confirm ")
 88 |                 != "YES"):
 89 |             return
 90 |         auctions_data = dict()
 91 |         original_length = 0
 92 |     else:
 93 |         auctions = read_json(raw_marketplace_data_file_path)
 94 |         auctions_data = {auction["id"]: auction for auction in auctions}
 95 |         original_length = len(auctions_data)
 96 | 
 97 |     captured_date = datetime.date.today().isoformat()
 98 |     weapon_url_names = DataHandler().get_url_names()
 99 | 
100 |     price_orderings = ["price_asc", "price_desc"]
101 |     pbar = tqdm.tqdm(weapon_url_names, "Fetching Marketplace Data", unit="weapon")
102 |     for weapon_name in pbar:
103 |         pbar.set_postfix(weapon=weapon_name, added=len(auctions_data) - original_length)
104 |         for price_ordering in price_orderings:
105 |             the_url = f"https://api.warframe.market/v1/auctions/search?type=riven"
106 |             the_url += f"&weapon_url_name={weapon_name}"
107 |             the_url += f"&sort_by={price_ordering}"
108 |             try:
109 |                 auctions = fetch_data(the_url)["payload"]["auctions"]
110 |             except KeyError as e:
111 |                 print(e)
112 |                 print(f"Skipping {weapon_name}_{price_ordering}...")
113 |                 continue
114 |             for auction in auctions:
115 |                 auction["captured_date"] = captured_date  # Add the date to each auction
116 |             id_auctions = {auction["id"]: auction for auction in auctions}
117 |             auctions_data.update(id_auctions)
118 | 
119 |     auctions_data = list(auctions_data.values())
120 |     save_json(raw_marketplace_data_file_path, auctions_data)
121 | 
122 |     print("Marketplace data saved.")
123 |     print(f"{len(auctions_data)} total entries.\n")
124 | 
125 | 
126 | def download_developer_riven_summary_stats(the_url: str = "https://api.warframestat.us/pc/rivens"):
127 |     """
128 |     Downloads and processes summary statistics for traded Rivens from the provided API, then saves the data.
129 |     The data is organized into a dictionary with weapon names as keys and a dictionary containing rolled, unrolled,
130 |     and combined statistics as values.
131 | 
132 |     Args:
133 |         the_url (str): The URL of the API endpoint to retrieve Riven statistics. Defaults to the official
134 |                        Warframe Rivens API for the PC platform.
135 |     """
136 |     # Fetch Riven summary statistics data from the API.
137 |     riven_stats_data = fetch_data(the_url)
138 | 
139 |     # Save the reformatted Riven statistics to a JSON file.
140 |     save_json(developer_summary_stats_file_path, riven_stats_data)
141 | 
142 |     print("Downloaded and saved Riven summary statistics.\n")
143 | 
144 | 
145 | def download_ingame_weapon_stats(the_url: str = "https://api.warframestat.us/weapons"):
146 |     # Fetch weapon statistics data from the API.
147 |     ig_weapon_stats = fetch_data(the_url)
148 | 
149 |     ig_data = dict()
150 |     for weapon in ig_weapon_stats:
151 |         name = weapon["name"]
152 |         undesired_keys = ["patchlogs", "components"]
153 |         weapon = {k: weapon[k] for k in sorted(weapon.keys()) if k not in undesired_keys}
154 |         ig_data[name] = weapon
155 | 
156 |     save_json(ig_weapon_stats_file_path, ig_data)
157 | 
158 |     print("Downloaded and saved in-game weapon stats.\n")
159 | 
160 | 
161 | def main(running_all: bool = False, overwrite_marketplace_data: bool = False):
162 |     """
163 |     Downloads all the data you'll need from the interweb.
164 |     """
165 |     running = [
166 |         {"run": True, "func": download_items_data},
167 |         {"run": True, "func": download_attributes_data},
168 |         {"run": True, "func": lambda: download_marketplace_database(overwrite=overwrite_marketplace_data)},
169 |         {"run": True, "func": download_developer_riven_summary_stats},
170 |         {"run": True, "func": download_ingame_weapon_stats},
171 |     ]
172 | 
173 |     for action in running:
174 |         if running_all or action["run"]:
175 |             action["func"]()
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     main()
180 | 


--------------------------------------------------------------------------------
/training/trainers/train_liquidity_model.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import pandas as pd
  3 | import tensorflow as tf
  4 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
  5 | from sklearn.model_selection import train_test_split
  6 | from sklearn.utils import resample
  7 | 
  8 | from warframe_marketplace_predictor.filepaths import *
  9 | from warframe_marketplace_predictor.training.preprocessors.liquidity_model_preprocessor import Preprocessor, \
 10 |     get_model_architecture
 11 | 
 12 | 
 13 | def plot_classification_performance(y_test, y_test_pred_proba, threshold=0.5):
 14 |     # Convert predicted probabilities to binary predictions based on the threshold
 15 |     y_test_pred = (y_test_pred_proba >= threshold).astype(int)
 16 | 
 17 |     # Calculate classification performance metrics
 18 |     accuracy = accuracy_score(y_test, y_test_pred)
 19 |     precision = precision_score(y_test, y_test_pred)
 20 |     recall = recall_score(y_test, y_test_pred)
 21 |     f1 = f1_score(y_test, y_test_pred)
 22 | 
 23 |     # Print the metrics
 24 |     print(f"Accuracy: {accuracy:.4f}")
 25 |     print(f"Precision: {precision:.4f}")
 26 |     print(f"Recall: {recall:.4f}")
 27 |     print(f"F1 Score: {f1:.4f}")
 28 | 
 29 |     # Confusion Matrix
 30 |     cm = confusion_matrix(y_test, y_test_pred)
 31 |     print(f"Confusion Matrix:\n{cm}")
 32 | 
 33 |     # ROC Curve
 34 |     fpr, tpr, _ = roc_curve(y_test, y_test_pred_proba)
 35 |     roc_auc = auc(fpr, tpr)
 36 | 
 37 |     plt.figure(figsize=(10, 6))
 38 |     plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC Curve (AUC = {roc_auc:.4f})')
 39 |     plt.plot([0, 1], [0, 1], color='red', linestyle='--', lw=2)
 40 |     plt.xlim([0.0, 1.0])
 41 |     plt.ylim([0.0, 1.05])
 42 |     plt.xlabel('False Positive Rate')
 43 |     plt.ylabel('True Positive Rate')
 44 |     plt.title('Receiver Operating Characteristic (ROC) Curve')
 45 |     plt.legend(loc="lower right")
 46 |     plt.grid(True)
 47 |     plt.show()
 48 | 
 49 | 
 50 | def main(show_graph: bool):
 51 |     # Read in data
 52 |     try:
 53 |         df = pd.read_csv(marketplace_dataframe_file_path)
 54 |     except FileNotFoundError as f:
 55 |         print("Original Error:", f)
 56 |         print("You need to run 'auto_setup' first.")
 57 |         exit()
 58 | 
 59 |     # Quick data examination
 60 |     pd.set_option("display.max_columns", None)  # Show all columns
 61 |     pd.set_option("display.width", None)  # No max width for display
 62 |     pd.set_option("display.max_colwidth", None)  # No limit on column width
 63 |     print(df.head())
 64 |     print(df.shape)
 65 |     print(df.isnull().sum())
 66 |     print(df.columns)
 67 |     print(df.describe())
 68 |     print(f"Class distribution before sampling:\n{df['has_sold'].value_counts()}")
 69 | 
 70 |     # Separate the "has_sold" dataframe
 71 |     df_sold = df[df["has_sold"] == 1]
 72 |     df_not_sold = df[df["has_sold"] == 0]
 73 | 
 74 |     # Split the not_sold into bad sellers and can’t tell sellers
 75 |     oldest_sold_item_by_percentile = df_sold["days_listed"].quantile(0.90)
 76 |     df_sold = df_sold[df_sold["days_listed"] < oldest_sold_item_by_percentile]
 77 |     df_bad_sellers = df_not_sold[df_not_sold["days_listed"] > oldest_sold_item_by_percentile]
 78 |     # df_undetermined_sellers = df_not_sold[df_not_sold["days_listed"] < oldest_sold_item_by_percentile]
 79 | 
 80 |     # See synthetic data shapes
 81 |     print("Sold df shape:", df_sold.shape)
 82 |     print("Bad sellers df shape:", df_bad_sellers.shape)
 83 | 
 84 |     # Combine the relevant data for class balancing
 85 |     df_combined = pd.concat([df_bad_sellers, df_sold])
 86 | 
 87 |     # Dynamically determine majority and minority classes
 88 |     class_counts = df_combined['has_sold'].value_counts()
 89 |     majority_class = class_counts.idxmax()
 90 |     minority_class = class_counts.idxmin()
 91 | 
 92 |     # Separate the majority and minority classes
 93 |     df_majority = df_combined[df_combined["has_sold"] == majority_class]
 94 |     df_minority = df_combined[df_combined["has_sold"] == minority_class]
 95 | 
 96 |     # Upsample minority class
 97 |     df_minority_upsampled = resample(df_minority,
 98 |                                      replace=True,  # Sample with replacement
 99 |                                      n_samples=len(df_majority),  # Match number of majority class
100 |                                      random_state=42)  # Reproducible results
101 | 
102 |     # Combine upsampled minority class with the majority class
103 |     df_balanced = pd.concat([df_minority_upsampled, df_majority])
104 | 
105 |     # Shuffle the balanced DataFrame
106 |     df = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
107 |     print(f"Class distribution after balancing:\n{df['has_sold'].value_counts()}")
108 | 
109 |     # Prepare your data
110 |     features = ["weapon_url_name",
111 |                 "re_rolled",
112 |                 "positive1", "positive2", "positive3", "negative",
113 |                 "listing_price"]
114 |     target = "has_sold"
115 |     X = df[features]
116 |     y = df[target].astype(int)
117 | 
118 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
119 | 
120 |     # Preprocessing data
121 |     preprocessor = Preprocessor()
122 |     X_train_preprocessed = preprocessor.fit_transform(X_train)
123 |     X_test_preprocessed = preprocessor.transform(X_test)
124 | 
125 |     model = get_model_architecture()
126 |     model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
127 |     model.summary()
128 | 
129 |     # Define early stopping callback
130 |     early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
131 |                                                       patience=10,
132 |                                                       min_delta=0.001,
133 |                                                       restore_best_weights=True)
134 | 
135 |     # Train the model
136 |     model.fit(
137 |         X_train_preprocessed,
138 |         y_train,
139 |         epochs=100,
140 |         validation_split=0.2,
141 |         batch_size=128,
142 |         callbacks=[early_stopping],
143 |         verbose=1
144 |     )
145 | 
146 |     if show_graph:
147 |         y_test_pred = model.predict(X_test_preprocessed)
148 |         plot_classification_performance(y_test, y_test_pred)
149 | 
150 |     preprocessor = Preprocessor()
151 |     X_preprocessed = preprocessor.fit_transform(X)
152 | 
153 |     # Rebuild and recompile the model
154 |     model = get_model_architecture()
155 |     model.compile(optimizer="adam", loss="binary_crossentropy")
156 | 
157 |     # Instantiate the custom callback with the target loss
158 |     best_epoch = early_stopping.best_epoch
159 | 
160 |     # Retrain the model on the full dataset
161 |     model.fit(
162 |         X_preprocessed,
163 |         y,
164 |         epochs=best_epoch,
165 |         batch_size=128,
166 |         verbose=1
167 |     )
168 | 
169 |     # Save model.
170 |     model.save(liquidity_model_model_file_path)
171 |     preprocessor.save(liquidity_model_preprocessor_file_path)
172 | 
173 | 
174 | def _save_preprocessor():
175 |     # Read in data
176 |     try:
177 |         df = pd.read_csv(marketplace_dataframe_file_path)
178 |     except FileNotFoundError as f:
179 |         print("Original Error:", f)
180 |         print("You need to run 'auto_setup' first.")
181 |         exit()
182 | 
183 |     # Prepare your data
184 |     features = ["weapon_url_name",
185 |                 "positive1", "positive2", "positive3", "negative",
186 |                 "listing_price"]
187 | 
188 |     X = df[features]
189 |     preprocessor = Preprocessor()
190 |     preprocessor.fit_transform(X)
191 |     preprocessor.save(liquidity_model_preprocessor_file_path)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |     # _save_preprocessor()
196 |     main(True)
197 | 


--------------------------------------------------------------------------------
/training/trainers/train_price_model.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import pandas as pd
  6 | import seaborn as sns
  7 | from matplotlib.ticker import ScalarFormatter
  8 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
  9 | from sklearn.model_selection import train_test_split
 10 | from tensorflow.keras.callbacks import EarlyStopping
 11 | 
 12 | from warframe_marketplace_predictor.filepaths import *
 13 | from warframe_marketplace_predictor.training.preprocessors.price_model_preprocessor import Preprocessor, get_model_architecture
 14 | 
 15 | warnings.filterwarnings("ignore", category=RuntimeWarning)
 16 | warnings.filterwarnings("ignore", category=FutureWarning)
 17 | 
 18 | 
 19 | def plot_performance(y_test, y_test_pred, history=None):
 20 |     """
 21 |     Plots performance metrics and a scatter plot of Actual vs Predicted values with improved scaling and aesthetics using Seaborn.
 22 |     Additionally, plots training and validation loss over epochs if history is provided.
 23 | 
 24 |     Parameters:
 25 |     - y_test: array-like of true target values.
 26 |     - y_test_pred: array-like of predicted target values.
 27 |     - history: Keras History object containing training history (optional).
 28 |     """
 29 |     # Calculate performance metrics
 30 |     r2 = r2_score(y_test, y_test_pred)
 31 |     mse = mean_squared_error(y_test, y_test_pred)
 32 |     mae = mean_absolute_error(y_test, y_test_pred)
 33 | 
 34 |     # Print the metrics
 35 |     print(f"R² Score: {r2:.4f}")
 36 |     print(f"Mean Squared Error (MSE): {mse:.4f}")
 37 |     print(f"Mean Absolute Error (MAE): {mae:.4f}")
 38 | 
 39 |     # Transform the data
 40 |     y_test_transformed = np.expm1(y_test)
 41 |     y_pred_transformed = np.expm1(y_test_pred)
 42 | 
 43 |     # Flatten the arrays to ensure they are 1D
 44 |     y_test_flat = y_test_transformed.ravel()
 45 |     y_pred_flat = y_pred_transformed.ravel()
 46 | 
 47 |     # Initialize the Seaborn style
 48 |     sns.set(style="whitegrid", palette="pastel", font_scale=1.2)
 49 | 
 50 |     # Create subplots: 1 row, 2 columns
 51 |     fig, axes = plt.subplots(1, 2, figsize=(16, 8))
 52 | 
 53 |     # --- Left Plot: Actual vs Predicted Scatter Plot ---
 54 |     ax1 = axes[0]
 55 | 
 56 |     sns.scatterplot(
 57 |         x=y_test_flat,
 58 |         y=y_pred_flat,
 59 |         color="skyblue",
 60 |         edgecolor="w",
 61 |         s=60,
 62 |         alpha=0.6,
 63 |         label="Predicted vs Actual",
 64 |         ax=ax1
 65 |     )
 66 | 
 67 |     # Plot the Perfect Prediction line (y = x)
 68 |     min_val = min(y_test_flat.min(), y_pred_flat.min())
 69 |     max_val = max(y_test_flat.max(), y_pred_flat.max())
 70 | 
 71 |     ax1.plot(
 72 |         [min_val, max_val],
 73 |         [min_val, max_val],
 74 |         color="red",
 75 |         linestyle="--",
 76 |         linewidth=2,
 77 |         label="Perfect Prediction"
 78 |     )
 79 | 
 80 |     # Sort the data for a smooth best fit line
 81 |     sorted_indices = np.argsort(y_test_flat)
 82 |     y_test_sorted = y_test_flat[sorted_indices]
 83 |     y_pred_sorted = y_pred_flat[sorted_indices]
 84 | 
 85 |     # Plot the Best Fit line using Seaborn"s regplot without scatter
 86 |     sns.regplot(
 87 |         x=y_test_sorted,
 88 |         y=y_pred_sorted,
 89 |         scatter=False,
 90 |         color="darkblue",
 91 |         line_kws={"linewidth": 2, "label": "Best Fit"},
 92 |         ax=ax1
 93 |     )
 94 | 
 95 |     # Customize the first plot
 96 |     ax1.set_title("Actual vs Predicted Values", fontsize=16, weight="bold")
 97 |     ax1.set_xlabel("Actual Values", fontsize=14)
 98 |     ax1.set_ylabel("Predicted Values", fontsize=14)
 99 | 
100 |     # Set log scales
101 |     ax1.set_xscale("log")
102 |     ax1.set_yscale("log")
103 | 
104 |     # Format the tick labels to avoid scientific notation
105 |     formatter = ScalarFormatter()
106 |     formatter.set_scientific(False)
107 |     ax1.xaxis.set_major_formatter(formatter)
108 |     ax1.yaxis.set_major_formatter(formatter)
109 | 
110 |     # Handle the legend to avoid duplicate labels
111 |     handles, labels = ax1.get_legend_handles_labels()
112 |     by_label = dict(zip(labels, handles))
113 |     ax1.legend(by_label.values(), by_label.keys(), loc="upper left", fontsize=12)
114 | 
115 |     # --- Right Plot: Training vs Validation Loss ---
116 |     if history is not None:
117 |         ax2 = axes[1]
118 | 
119 |         # Extract loss and validation loss from history
120 |         loss = history.history.get("loss")
121 |         val_loss = history.history.get("val_loss")
122 |         epochs = range(1, len(loss) + 1)
123 | 
124 |         # Plot training loss
125 |         sns.lineplot(
126 |             x=epochs,
127 |             y=loss,
128 |             label="Training Loss",
129 |             ax=ax2,
130 |             color="blue",
131 |             linewidth=2
132 |         )
133 | 
134 |         # Plot validation loss if available
135 |         if val_loss:
136 |             sns.lineplot(
137 |                 x=epochs,
138 |                 y=val_loss,
139 |                 label="Validation Loss",
140 |                 ax=ax2,
141 |                 color="orange",
142 |                 linewidth=2
143 |             )
144 | 
145 |         ax2.set_title("Training vs Validation Loss", fontsize=16, weight="bold")
146 |         ax2.set_xlabel("Epoch", fontsize=14)
147 |         ax2.set_ylabel("Loss", fontsize=14)
148 |         ax2.legend(loc="upper right", fontsize=12)
149 | 
150 |     plt.tight_layout()
151 |     plt.show()
152 | 
153 | 
154 | def train_model(show_graph: bool):
155 |     # Read in data
156 |     try:
157 |         df = pd.read_csv(marketplace_dataframe_file_path)
158 |     except FileNotFoundError as f:
159 |         print("Original Error:", f)
160 |         print("You need to run 'auto_setup' first.")
161 |         exit()
162 | 
163 |     # Quick data examination
164 |     pd.set_option("display.max_columns", None)  # Show all columns
165 |     pd.set_option("display.width", None)  # No max width for display
166 |     pd.set_option("display.max_colwidth", None)  # No limit on column width
167 |     print(df.head())
168 |     print(df.shape)
169 |     print(df.isnull().sum())
170 |     print(df.columns)
171 |     print(df.describe())
172 | 
173 |     # Prepare your data
174 |     features = ["weapon_url_name",
175 |                 "re_rolled",
176 |                 "positive1", "positive2", "positive3", "negative"]
177 |     target = "listing_price"
178 |     X = df[features]
179 |     y = df[target]
180 | 
181 |     # Visual inspection of y's values justify this decision -- Trust me bro
182 |     y_log = np.log1p(y)
183 | 
184 |     X_train, X_test, y_train_log, y_test_log = train_test_split(X, y_log, test_size=0.2, random_state=42)
185 | 
186 |     # Preprocessing data
187 |     preprocessor = Preprocessor()
188 |     X_train_preprocessed = preprocessor.fit_transform(X_train)
189 |     X_test_preprocessed = preprocessor.transform(X_test)
190 |     for v in X_test_preprocessed:
191 |         print(v)
192 |         print()
193 | 
194 |     # Compile the model
195 |     model = get_model_architecture()
196 |     model.compile(optimizer="adam", loss="logcosh")
197 |     model.summary()
198 | 
199 |     # Define callbacks
200 |     early_stopping = EarlyStopping(
201 |         monitor="val_loss",
202 |         patience=10,
203 |         min_delta=0.0001,
204 |         restore_best_weights=True
205 |     )
206 | 
207 |     callbacks = [early_stopping]
208 | 
209 |     # Train the model
210 |     history = model.fit(
211 |         X_train_preprocessed,
212 |         y_train_log,
213 |         epochs=100,
214 |         validation_split=0.2,
215 |         batch_size=256,
216 |         callbacks=callbacks,
217 |         verbose=1
218 |     )
219 | 
220 |     if show_graph:
221 |         y_test_log_pred = model.predict(X_test_preprocessed)
222 |         plot_performance(y_test_log, y_test_log_pred, history)
223 | 
224 |     # Preprocess the entire dataset
225 |     preprocessor_final = Preprocessor()
226 |     X_preprocessed_full = preprocessor_final.fit_transform(X)
227 | 
228 |     # Rebuild and recompile the model
229 |     final_model = get_model_architecture()
230 |     final_model.compile(optimizer="adam", loss="logcosh")
231 | 
232 |     # Retrain the model on the full dataset
233 |     final_model.fit(
234 |         X_preprocessed_full,
235 |         y_log,
236 |         epochs=early_stopping.best_epoch,
237 |         batch_size=256,
238 |         verbose=1
239 |     )
240 | 
241 |     # Save the final model and preprocessor
242 |     final_model.save(price_model_model_file_path)
243 |     preprocessor_final.save(price_model_preprocessor_file_path)
244 | 
245 |     print("Training complete. Model and preprocessor saved.")
246 | 
247 | 
248 | def _manual_save_preprocessor():
249 |     # Read in data
250 |     try:
251 |         df = pd.read_csv(marketplace_dataframe_file_path)
252 |     except FileNotFoundError as f:
253 |         print("Original Error:", f)
254 |         print("You need to run 'auto_setup' first.")
255 |         exit()
256 | 
257 |     # Prepare your data
258 |     features = ["weapon_url_name",  "positive1", "positive2", "positive3", "negative"]
259 | 
260 |     X = df[features]
261 |     preprocessor = Preprocessor()
262 |     preprocessor.fit_transform(X)
263 |     preprocessor.save(price_model_preprocessor_file_path)
264 | 
265 | 
266 | if __name__ == "__main__":
267 |     # _manual_save_preprocessor()
268 |     train_model(show_graph=True)
269 | 


--------------------------------------------------------------------------------
/training/preprocessors/price_model_preprocessor.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import warnings
  3 | from typing import List
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import tensorflow as tf
  8 | from sklearn.base import BaseEstimator, TransformerMixin
  9 | from tensorflow.keras import layers, Model
 10 | 
 11 | from warframe_marketplace_predictor.filepaths import *
 12 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 13 | 
 14 | # Suppress specific FutureWarning related to .fillna downcasting
 15 | warnings.filterwarnings("ignore", message=".*Downcasting object dtype arrays.*")
 16 | 
 17 | 
 18 | class Preprocessor(BaseEstimator, TransformerMixin):
 19 |     def __init__(self):
 20 |         # Scalers for numerical features
 21 |         # self.disposition_scaler = MinMaxScaler()
 22 |         # self.avg_trade_price_scaler = StandardScaler()
 23 | 
 24 |         # Placeholder for most common disposition and mean log price
 25 |         # self.disposition_most_common = None
 26 |         # self.avg_trade_price_log_mean = None
 27 | 
 28 |         # Vocabulary for attributes and groups
 29 |         # group_names = DataHandler().get_groups()
 30 | 
 31 |         # OneHotEncoders for "group" and attributes
 32 |         # self.group_encoder = OneHotEncoder(
 33 |         #     categories=[group_names],
 34 |         #     handle_unknown="error",
 35 |         #     sparse_output=False
 36 |         # )
 37 |         pass
 38 | 
 39 |     def fit(self, X: pd.DataFrame, y=None) -> "Preprocessor":
 40 |         # # Compute the most common disposition
 41 |         # self.disposition_most_common = X["disposition"].mode()[0]
 42 |         #
 43 |         # # Fill missing values in avg_trade_price with the mean
 44 |         # self.avg_trade_price_mean = X["avg_trade_price"].mean()
 45 |         # X_avg_trade_price_filled = X["avg_trade_price"].fillna(self.avg_trade_price_mean)
 46 |         #
 47 |         # # Logarithmic transformation
 48 |         # X_avg_trade_price_log = np.log1p(X_avg_trade_price_filled)
 49 |         # self.avg_trade_price_log_mean = X_avg_trade_price_log.mean()
 50 |         #
 51 |         # # Fit scalars
 52 |         # X_disposition_filled = X["disposition"].fillna(self.disposition_most_common).to_frame()
 53 |         # self.disposition_scaler.fit(X_disposition_filled)
 54 |         # self.avg_trade_price_scaler.fit(X_avg_trade_price_log.to_frame())
 55 |         #
 56 |         # # Fit the OneHotEncoders
 57 |         # self.group_encoder.fit(X[["group"]])
 58 | 
 59 |         return self
 60 | 
 61 |     def transform(self, X: pd.DataFrame) -> List[pd.DataFrame]:
 62 |         X_copy = X.copy()
 63 | 
 64 |         # # Convert boolean columns to integers
 65 |         # X_copy["has_incarnon"] = X_copy["has_incarnon"].astype(int)
 66 |         # X_copy["re_rolled"] = X_copy["re_rolled"].astype(int)
 67 |         #
 68 |         # # Handle missing values and scale "disposition"
 69 |         # X_copy["disposition"] = X_copy["disposition"].fillna(self.disposition_most_common)
 70 |         # X_copy["disposition"] = self.disposition_scaler.transform(X_copy[["disposition"]])
 71 |         #
 72 |         # # Fill missing values in avg_trade_price with the mean from fit
 73 |         # X_copy["avg_trade_price"] = X_copy["avg_trade_price"].fillna(self.avg_trade_price_mean)
 74 |         #
 75 |         # # Apply logarithmic transformation
 76 |         # X_copy["avg_trade_price"] = np.log1p(X_copy["avg_trade_price"])
 77 |         #
 78 |         # # Scale avg_trade_price
 79 |         # X_copy["avg_trade_price"] = self.avg_trade_price_scaler.transform(X_copy[["avg_trade_price"]])
 80 |         #
 81 |         # # One-hot encode "group"
 82 |         # group_encoded = self.group_encoder.transform(X_copy[["group"]])
 83 |         # group_encoded_df = pd.DataFrame(
 84 |         #     group_encoded,
 85 |         #     columns=self.group_encoder.get_feature_names_out(["group"]),
 86 |         #     index=X_copy.index
 87 |         # )
 88 |         #
 89 |         # # Drop original categorical columns
 90 |         # X_copy = X_copy.drop(columns=["group"])
 91 |         #
 92 |         # # Concatenate the one-hot encoded columns
 93 |         # X_copy = pd.concat([X_copy, group_encoded_df], axis=1)
 94 | 
 95 |         X_copy["re_rolled"] = X_copy["re_rolled"].astype(np.float32)
 96 | 
 97 |         X_copy = X_copy.fillna("<NONE>")
 98 | 
 99 |         return self.split_X(X_copy)
100 | 
101 |     @staticmethod
102 |     def split_X(X: pd.DataFrame) -> List[pd.DataFrame]:
103 |         # Assuming the one-hot encoded group columns start with "group_"
104 |         # group_columns = [col for col in X.columns if col.startswith("group_")]
105 | 
106 |         return [
107 |             X[["weapon_url_name"]],  # weapon_url_name_input
108 |             # X[group_columns],  # group_one_hot_encoded
109 |             # X[["has_incarnon"]],  # has_incarnon_input
110 |             # X[["avg_trade_price"]],  # avg_trade_price_input
111 |             X[["re_rolled"]],  # re_rolled_input
112 |             # X[["disposition"]],  # disposition_input
113 |             X[["positive1", "positive2", "positive3", "negative"]]  # attribute_names_input
114 |         ]
115 | 
116 |     def save(self, filepath: str = None):
117 |         filepath = filepath if filepath else price_model_preprocessor_file_path
118 |         # Save the preprocessor instance to a pickle file
119 |         with open(filepath, "wb") as f:
120 |             pickle.dump(self, f)
121 | 
122 |     @staticmethod
123 |     def load(filepath: str = None) -> "Preprocessor":
124 |         filepath = filepath if filepath else price_model_preprocessor_file_path
125 |         # Load the preprocessor instance from a pickle file
126 |         with open(filepath, "rb") as f:
127 |             return pickle.load(f)
128 | 
129 | 
130 | def get_model_architecture():
131 |     # def get_positional_encoding(sequence_length, embedding_dim):
132 |     #     # Create a matrix of positions [0, 1, 2, ..., sequence_length-1]
133 |     #     position = tf.range(sequence_length, dtype=tf.float32)[:, tf.newaxis]
134 |     #
135 |     #     # Compute the division term correctly
136 |     #     div_term = tf.exp(tf.multiply(tf.range(0, embedding_dim, 2, dtype=tf.float32),
137 |     #                                   -tf.divide(tf.math.log(10000.0), embedding_dim)))
138 |     #
139 |     #     # Calculate angle_rads as position * div_term
140 |     #     angle_rads = tf.multiply(position, div_term)
141 |     #
142 |     #     # Assign sine to even indices (2i) and cosine to odd indices (2i+1)
143 |     #     pos_encoding = tf.concat([tf.sin(angle_rads), tf.cos(angle_rads)], axis=-1)
144 |     #
145 |     #     # If embedding_dim is odd, truncate the pos_encoding to the required dimensions
146 |     #     pos_encoding = pos_encoding[:, :embedding_dim]
147 |     #
148 |     #     return pos_encoding
149 | 
150 |     # Structure sizes
151 |     weapon_name_embedding_size = 32
152 |     attributes_embedding_size = 32
153 | 
154 |     # Load vocabularies
155 |     data_handler = DataHandler()
156 |     weapon_url_names = data_handler.get_url_names()
157 |     # group_names = data_handler.get_groups()
158 |     # group_input_size = len(group_names)
159 |     attributes = data_handler.get_attribute_names()
160 | 
161 |     # -- Inputs --
162 |     weapon_url_name_input = layers.Input(shape=(1,), dtype=tf.string, name="weapon_url_name_input")
163 |     # group_input = layers.Input(shape=(group_input_size,), dtype=tf.float32, name="group_input")
164 |     # has_incarnon_input = layers.Input(shape=(1,), dtype=tf.float32, name="has_incarnon_input")
165 |     # avg_trade_price_input = layers.Input(shape=(1,), dtype=tf.float32, name="avg_trade_price_input")
166 |     re_rolled_input = layers.Input(shape=(1,), dtype=tf.float32, name="re_rolled_input")
167 |     # disposition_input = layers.Input(shape=(1,), dtype=tf.float32, name="disposition_input")
168 |     attributes_input = layers.Input(shape=(4,), dtype=tf.string, name="attributes_input")
169 | 
170 |     # -- Weapon Path --
171 |     # String Lookups
172 |     weapon_url_name_lookup = layers.StringLookup(
173 |         vocabulary=weapon_url_names,
174 |         mask_token="<NONE>",
175 |         name="weapon_url_name_lookup"
176 |     )
177 | 
178 |     # Convert string inputs to integer indices
179 |     weapon_url_name_indices = weapon_url_name_lookup(weapon_url_name_input)
180 | 
181 |     # Embedding layers for Weapon Data
182 |     weapon_url_name_embedding_layer = layers.Embedding(
183 |         input_dim=len(weapon_url_names) + 1,  # +1 for mask token
184 |         output_dim=weapon_name_embedding_size,
185 |         name="weapon_url_name_embedding"
186 |     )
187 | 
188 |     # Generate embeddings and flatten
189 |     weapon_url_name_embedding_output = layers.Flatten()(weapon_url_name_embedding_layer(weapon_url_name_indices))
190 | 
191 |     # -- Attributes Path --
192 |     # String Lookups
193 |     attributes_lookup = layers.StringLookup(
194 |         vocabulary=attributes,
195 |         mask_token="<NONE>",
196 |         name="attributes_lookup"
197 |     )
198 | 
199 |     # Convert string inputs to integer indices
200 |     attributes_indices = attributes_lookup(attributes_input)
201 | 
202 |     # Embedding layers for Weapon Data
203 |     attributes_embedding_layer = layers.Embedding(
204 |         input_dim=len(attributes) + 1,  # +1 for mask token
205 |         output_dim=attributes_embedding_size,
206 |         name="attributes_embedding"
207 |     )
208 | 
209 |     # Generate embeddings for attribute indices
210 |     attributes_embedding_output = attributes_embedding_layer(attributes_indices)
211 |     flattened_attributes_embedding = layers.Flatten()(attributes_embedding_output)
212 | 
213 |     # Positional Encoding
214 |     # sequence_length = 4  # Fixed shape of (4,) for attributes input
215 | 
216 |     # # Generate positional encoding tensor and add to embeddings
217 |     # positional_encoding_matrix = get_positional_encoding(sequence_length, attributes_embedding_size)
218 |     # attributes_embeddings = tf.add(attributes_embedding_output, positional_encoding_matrix)
219 |     #
220 |     # # Attributes Self Attention Layer
221 |     # attributes_attention = layers.Attention()(
222 |     #     [attributes_embeddings, attributes_embeddings]
223 |     # )
224 |     # attributes_attention = layers.Dense(units=32, activation="relu")(attributes_attention)
225 |     # attributes_attention = layers.Flatten()(attributes_attention)
226 | 
227 |     # -- Concatenate with Other Features --
228 |     combined_embedding = layers.Concatenate(name="combined_embedding")(
229 |         # [weapon_url_name_embedding_output, group_input, has_incarnon_input, avg_trade_price_input,
230 |         #  re_rolled_input,
231 |         #  disposition_input, flattened_attributes_embedding]
232 |         [weapon_url_name_embedding_output, re_rolled_input, flattened_attributes_embedding]
233 |     )
234 | 
235 |     # -- Dense Layers for Final Prediction --
236 |     x = combined_embedding
237 |     x = layers.Dense(units=128, activation="relu")(x)
238 |     x = layers.Dense(units=32, activation="relu")(x)
239 | 
240 |     output = layers.Dense(units=1, activation="linear", name="output")(x)
241 | 
242 |     # Define the model with all inputs and the output
243 |     # model = Model(inputs=[
244 |     #     weapon_url_name_input, group_input, has_incarnon_input, avg_trade_price_input,
245 |     #     re_rolled_input,
246 |     #     disposition_input, attributes_input
247 |     # ], outputs=output, name="riven_model")
248 |     model = Model(inputs=[weapon_url_name_input, re_rolled_input, attributes_input], outputs=output, name="riven_model")
249 | 
250 |     return model
251 | 


--------------------------------------------------------------------------------
/shtuff/riven_funcs.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from functools import lru_cache
  3 | from typing import List, Dict, Any, Tuple, Union
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from prettytable import PrettyTable
  8 | 
  9 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 10 | from warframe_marketplace_predictor.shtuff.make_prediction import PricePredictor
 11 | 
 12 | 
 13 | def calculate_kuva_cost(re_rolls: int) -> int:
 14 |     return {
 15 |         0: 900,
 16 |         1: 1000,
 17 |         2: 1200,
 18 |         3: 1400,
 19 |         4: 1700,
 20 |         5: 2000,
 21 |         6: 2350,
 22 |         7: 2750,
 23 |         8: 3150,
 24 |         9: 3500
 25 |     }.get(re_rolls, 3500)
 26 | 
 27 | 
 28 | def calculate_expected_price_on_reroll(weapon_prices_pdf, listing_price):
 29 |     # Reconstruct the prices and probabilities
 30 |     weapon_prices = np.array(list(weapon_prices_pdf.keys()), dtype=np.float32)
 31 |     weapon_prices_pdf_values = np.array(list(weapon_prices_pdf.values()), dtype=np.float32)
 32 |     # Sort the prices and corresponding pdf values
 33 |     sorted_indices = np.argsort(weapon_prices)
 34 |     weapon_prices = weapon_prices[sorted_indices]
 35 |     weapon_prices_pdf_values = weapon_prices_pdf_values[sorted_indices]
 36 |     weapon_prices_pdf_values /= np.sum(weapon_prices_pdf_values)
 37 | 
 38 |     # Compute cumulative distribution function (CDF)
 39 |     prices_cdf = np.cumsum(weapon_prices_pdf_values)
 40 |     prices_cdf /= prices_cdf[-1]  # Normalize to make sure it sums to 1
 41 | 
 42 |     # Find the position of the listing price in the weapon's price distribution
 43 |     price_position = np.searchsorted(weapon_prices, listing_price, side="right")
 44 | 
 45 |     if price_position >= len(weapon_prices) - 1:
 46 |         probability_stagnant_roll = 1.0
 47 |         expected_improved_listing_price = listing_price
 48 |     else:
 49 |         probability_stagnant_roll = prices_cdf[price_position]
 50 |         improved_prices = weapon_prices[price_position + 1:]
 51 |         improved_prices_pdf = weapon_prices_pdf_values[price_position + 1:]
 52 |         improved_prices_pdf /= np.sum(improved_prices_pdf)
 53 |         expected_improved_listing_price = np.dot(improved_prices, improved_prices_pdf)
 54 | 
 55 |     expected_price_per_reroll = ((probability_stagnant_roll * listing_price)
 56 |                                  + (1 - probability_stagnant_roll) * expected_improved_listing_price)
 57 | 
 58 |     return expected_price_per_reroll, probability_stagnant_roll
 59 | 
 60 | 
 61 | def get_possible_rivens(item_name: str, re_rolled: bool, attributes: List[str] = None,
 62 |                         use_official_attributes: bool = False, order_matters: bool = True,
 63 |                         df_format: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
 64 |     """
 65 |     Generate all possible rivens for a given item based on its attributes.
 66 |     Utilizes caching to avoid recomputing rivens for previously seen attribute sets.
 67 | 
 68 |     :param item_name: The name of the item.
 69 |     :param re_rolled: Boolean indicating if the riven has been re-rolled.
 70 |     :param attributes: Optional list of attributes. If None, attributes are fetched based on the item.
 71 |     :param use_official_attributes: Use the official attribute names instead of the training/url names.
 72 |     :param order_matters: Used to determine whether to get permutations (for analysis) or combinations (for estimates).
 73 |     :param df_format: Optionally return it in a dataframe format.
 74 |     :return: A list of dictionaries representing possible rivens.
 75 |     """
 76 |     if attributes is None:
 77 |         data_handler = DataHandler()
 78 |         attributes = data_handler.get_weapon_specific_attributes(item_name)
 79 | 
 80 |     if use_official_attributes:
 81 |         data_handler = DataHandler()
 82 |         attributes = [data_handler.get_official_attribute_name(x) for x in attributes]
 83 | 
 84 |     # Sort and convert attributes to a tuple to ensure consistency and hashability
 85 |     attributes_sorted = tuple(sorted(attributes))
 86 | 
 87 |     # Retrieve cached rivens based on attributes
 88 |     base_rivens = _compute_rivens_cached(attributes_sorted, order_matters)
 89 | 
 90 |     # Add 'name' and 're_rolled' to each riven
 91 |     rivens = [
 92 |         {
 93 |             **riven,
 94 |             "name": item_name,
 95 |             "re_rolled": re_rolled
 96 |         }
 97 |         for riven in base_rivens
 98 |     ]
 99 | 
100 |     if df_format:
101 |         for r in rivens:
102 |             for i, p in enumerate(r["positives"], start=1):
103 |                 r[f"positive{i}"] = p
104 |             del r["positives"]
105 |             r["negative"] = r["negatives"][0] if r["negatives"] else None
106 |             del r["negatives"]
107 |         return pd.DataFrame(rivens)
108 | 
109 |     return rivens
110 | 
111 | 
112 | @lru_cache(maxsize=64)
113 | def _compute_rivens_cached(attributes: Tuple[str, ...], order_matters: bool = True) -> List[Dict[str, Any]]:
114 |     """
115 |     Compute all possible rivens based on a sorted tuple of attributes.
116 |     This function is cached to optimize performance for repeated attribute sets.
117 | 
118 |     :param attributes: A sorted tuple of attribute strings.
119 |     :param order_matters: Used to determine whether to get permutations (for analysis) or combinations (for estimates).
120 |     :return: A list of dictionaries representing possible rivens without item-specific details.
121 |     """
122 |     rivens = []
123 | 
124 |     elementals = {"heat_damage", "cold_damage", "electric_damage", "toxin_damage",
125 |                   "Heat", "Cold", "Electricity", "Toxin"}
126 | 
127 |     # Define possible counts of positive and negative attributes
128 |     combinations = [(2, 0), (2, 1), (3, 0), (3, 1)]
129 | 
130 |     for positive_count, negative_count in combinations:
131 |         total_attributes = positive_count + negative_count
132 | 
133 |         # Generate all unique combinations without considering order
134 |         attribute_groups = itertools.permutations(attributes, r=total_attributes)
135 |         for attribute_group in attribute_groups:
136 |             positives = attribute_group[:positive_count]
137 |             negatives = attribute_group[positive_count:] if negative_count > 0 else tuple()
138 | 
139 |             # Skip if any elemental attribute is negative
140 |             if any(attr in elementals for attr in negatives):
141 |                 continue
142 | 
143 |             riven = {
144 |                 "positives": positives,
145 |                 "negatives": negatives
146 |             }
147 |             rivens.append(riven)
148 | 
149 |     # Filter out duplicates
150 |     riven_ids = dict()
151 |     if not order_matters:
152 |         for riven in rivens:
153 |             riven_id = "p".join(sorted(riven["positives"])) + "n".join(sorted(riven["negatives"]))
154 |             if riven_id in riven_ids:
155 |                 continue
156 |             riven_ids[riven_id] = riven
157 | 
158 |     if order_matters:
159 |         return rivens
160 |     else:
161 |         return list(riven_ids.values())
162 | 
163 | 
164 | def generate_table(rivens: List[Dict[str, Any]]) -> None:
165 |     data_handler = DataHandler()
166 |     rivens.sort(key=lambda x: x["expected_profit_per_kuva"], reverse=True)
167 |     kuva_scale = 1000
168 | 
169 |     thingy = {
170 |         "name": ("Name", "Weapon Name", lambda x: x),
171 | 
172 |         "weapon_ranking": ("Rank", "Weapon Ranking", lambda x: x),
173 |         "weapon_percentile": ("WP", "Weapon Ranking Percentile", lambda x: f"{100 * x:.2f}"),
174 |         "global_percentile": ("GP", "Global Ranking Percentile", lambda x: f"{x:.2f}"),
175 | 
176 |         "listing_price": ("List", "Predicted Listing Price", lambda x: f"{x:.0f}"),
177 |         "average_list_price": ("AvgList", "Weapon Average Listing Price", lambda x: f"{x:.0f}"),
178 |         "expected_price_on_reroll": ("EList", "Expected Listing Price on Reroll", lambda x: f"{x:.0f}"),
179 |         "expected_profit_on_reroll": ("EProf", "Expected Listing Profit on Reroll", lambda x: f"{x:.0f}"),
180 |         "expected_profit_per_kuva": (f"EProf{kuva_scale}K", f"Expected Listing Profit per {kuva_scale} Kuva",
181 |                                      lambda x: f"{kuva_scale * x:.2f}"),
182 | 
183 |         "positives": ("Pos", "Positives", lambda x: ", ".join(map(data_handler.get_official_attribute_name, x))),
184 |         "negatives": ("Neg", "Negatives", lambda x: ", ".join(map(data_handler.get_official_attribute_name, x))),
185 |         "re_rolls": ("Rerolls", "Number of Rerolls", lambda x: x),
186 |     }
187 | 
188 |     keys_order = list(rivens[0].keys())
189 | 
190 |     key_table = PrettyTable()
191 |     key_table.field_names = ["Key", "Meaning"]
192 |     for key in keys_order:
193 |         key_table.add_row([thingy[key][0], thingy[key][1]])
194 | 
195 |     print(key_table)
196 | 
197 |     table = PrettyTable()
198 |     table.field_names = [thingy[key][0] for key in keys_order]
199 |     for riven in rivens:
200 |         table.add_row([thingy[key][-1](riven[key]) for key in riven])
201 | 
202 |     print(table)
203 | 
204 | 
205 | def analyze_rivens(rivens: List[Dict[str, Any]]) -> None:
206 |     data_handler = DataHandler()
207 |     price_predictor = PricePredictor()
208 | 
209 |     # Check and validate riven attributes
210 |     if not all(map(price_predictor.is_valid, rivens)):
211 |         return
212 | 
213 |     processed_rivens = []
214 | 
215 |     # Predict the listing prices
216 |     listing_prices = price_predictor.predict(rivens, verbose=True)
217 |     for riven, listing_price in zip(rivens, listing_prices):
218 |         # Get weapon-related info from precomputed data
219 |         weapon_name = riven["name"]
220 |         rank_data = data_handler.get_weapon_ranking_information(weapon_name)
221 |         rank = rank_data["rank"]
222 |         expected_value = rank_data["expected_value"]
223 |         weapon_ranking = f"{rank}"
224 | 
225 |         # Get the price distribution for the weapon
226 |         weapon_prices_pdf = rank_data["price_distribution"]
227 |         expected_price_on_reroll, probability_stagnant_roll = (
228 |             calculate_expected_price_on_reroll(weapon_prices_pdf, listing_price))
229 | 
230 |         expected_profit_on_reroll = expected_price_on_reroll - listing_price
231 | 
232 |         re_rolls = riven["re_rolls"]
233 |         kuva_cost = calculate_kuva_cost(re_rolls)
234 |         expected_profit_per_kuva = expected_profit_on_reroll / kuva_cost
235 | 
236 |         # Get global price percentile
237 |         global_percentile = data_handler.get_global_price_percentile(listing_price)
238 | 
239 |         # Get correctly named positives and negatives
240 |         positives = [data_handler.get_proper_attribute_name(x) for x in riven["positives"]]
241 |         negatives = [data_handler.get_proper_attribute_name(x) for x in riven["negatives"]]
242 | 
243 |         processed_riven = {
244 |             "name": weapon_name,
245 | 
246 |             "weapon_ranking": weapon_ranking,
247 |             "weapon_percentile": probability_stagnant_roll,
248 |             "global_percentile": global_percentile,
249 | 
250 |             "listing_price": listing_price,
251 |             "average_list_price": expected_value,
252 |             "expected_price_on_reroll": expected_price_on_reroll,
253 |             "expected_profit_on_reroll": expected_profit_on_reroll,
254 |             "expected_profit_per_kuva": expected_profit_per_kuva,
255 | 
256 |             "positives": positives,
257 |             "negatives": negatives,
258 |             "re_rolls": re_rolls,
259 |         }
260 | 
261 |         processed_rivens.append(processed_riven)
262 | 
263 |     generate_table(processed_rivens)
264 | 
265 | 


--------------------------------------------------------------------------------
/training/preprocessors/liquidity_model_preprocessor.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import warnings
  3 | from typing import List
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import tensorflow as tf
  8 | from sklearn.base import BaseEstimator, TransformerMixin
  9 | from sklearn.linear_model import LinearRegression
 10 | from sklearn.preprocessing import MinMaxScaler
 11 | from tensorflow.keras import layers, Model
 12 | 
 13 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 14 | 
 15 | # Suppress specific FutureWarning related to .fillna downcasting
 16 | warnings.filterwarnings("ignore", message=".*Downcasting object dtype arrays.*")
 17 | 
 18 | 
 19 | class Preprocessor(BaseEstimator, TransformerMixin):
 20 |     def __init__(self):
 21 |         # Scalers for continuous variables
 22 |         # self.disposition_scaler = MinMaxScaler()
 23 |         # self.avg_trade_price_scaler = MinMaxScaler()
 24 |         # self.listing_scaler = MinMaxScaler()
 25 |         #
 26 |         # # Mean value for disposition (used for imputation)
 27 |         # self.disposition_most_common = None
 28 |         #
 29 |         # # Regression model for imputing avg_trade_price
 30 |         # self.avg_trade_price_model = None
 31 |         #
 32 |         # # Mapping of weapon_url_name to known avg_trade_price
 33 |         # self.known_avg_trade_prices = {}
 34 |         pass
 35 | 
 36 |     def fit(self, X: pd.DataFrame, y=None) -> 'Preprocessor':
 37 |         # Store mean value for disposition
 38 |         # self.disposition_most_common = X["disposition"].mode()[0]
 39 |         #
 40 |         # # Prepare data for regression model
 41 |         # known_avg_trade = X.dropna(subset=["avg_trade_price"])
 42 |         # self._store_known_avg_trade_prices(known_avg_trade)
 43 |         # model_data = self._prepare_regression_data(known_avg_trade)
 44 |         # self._train_regression_model(model_data)
 45 |         #
 46 |         # # Fit scalers
 47 |         # self._fit_scalers(X, known_avg_trade)
 48 | 
 49 |         return self
 50 | 
 51 |     def transform(self, X: pd.DataFrame) -> List[pd.DataFrame]:
 52 |         X_copy = X.copy()
 53 | 
 54 |         # # Convert boolean columns to integers
 55 |         # self._convert_boolean_columns(X_copy)
 56 |         #
 57 |         # # Handle disposition
 58 |         # self._transform_disposition(X_copy)
 59 |         #
 60 |         # # Transform listing
 61 |         # self._transform_listing(X_copy)
 62 |         #
 63 |         # # Impute and transform avg_trade_price
 64 |         # self._impute_and_transform_avg_trade_price(X_copy)
 65 | 
 66 |         X_copy["re_rolled"] = X_copy["re_rolled"].astype(np.float32)
 67 |         X_copy["listing_price"] = np.log1p(X_copy["listing_price"])
 68 | 
 69 |         # Fill remaining missing values with "<NONE>"
 70 |         X_copy = X_copy.fillna("<NONE>")
 71 | 
 72 |         return self.split_X(X_copy)
 73 | 
 74 |     # def _store_known_avg_trade_prices(self, known_avg_trade: pd.DataFrame):
 75 |     #     """Store known avg_trade_price values for each weapon_url_name."""
 76 |     #     self.known_avg_trade_prices = known_avg_trade.groupby("weapon_url_name")["avg_trade_price"].first().to_dict()
 77 |     #
 78 |     # def _prepare_regression_data(self, known_avg_trade: pd.DataFrame) -> pd.DataFrame:
 79 |     #     """Prepare the dataset for training the regression model."""
 80 |     #     # Aggregate listing features
 81 |     #     listing_agg = known_avg_trade.groupby("weapon_url_name").agg({
 82 |     #         "listing": ["mean", "median", "std", "min", "max"]
 83 |     #     })
 84 |     #     listing_agg.columns = ['listing_' + stat for stat in ['mean', 'median', 'std', 'min', 'max']]
 85 |     #
 86 |     #     # Target variable: avg_trade_price per weapon
 87 |     #     avg_trade_price_per_weapon = known_avg_trade.groupby("weapon_url_name")["avg_trade_price"].first()
 88 |     #
 89 |     #     # Merge features and target
 90 |     #     model_data = listing_agg.join(avg_trade_price_per_weapon)
 91 |     #
 92 |     #     return model_data
 93 |     #
 94 |     # def _train_regression_model(self, model_data: pd.DataFrame):
 95 |     #     """Train the regression model for imputing avg_trade_price."""
 96 |     #     X_model = model_data.drop(columns=["avg_trade_price"])
 97 |     #     y_model = model_data["avg_trade_price"]
 98 |     #
 99 |     #     self.avg_trade_price_model = LinearRegression()
100 |     #     self.avg_trade_price_model.fit(X_model, y_model)
101 |     #
102 |     # def _fit_scalers(self, X: pd.DataFrame, known_avg_trade: pd.DataFrame):
103 |     #     """Fit scalers for disposition, avg_trade_price, and listing."""
104 |     #     # Disposition scaler
105 |     #     X_disposition = X["disposition"].fillna(self.disposition_most_common).to_frame()
106 |     #     self.disposition_scaler.fit(X_disposition)
107 |     #
108 |     #     # avg_trade_price scaler (after log transformation)
109 |     #     avg_trade_price_log = np.log1p(known_avg_trade["avg_trade_price"]).to_frame()
110 |     #     self.avg_trade_price_scaler.fit(avg_trade_price_log)
111 |     #
112 |     #     # listing scaler (after log transformation)
113 |     #     listing_log = np.log1p(X["listing"]).to_frame()
114 |     #     self.listing_scaler.fit(listing_log)
115 |     #
116 |     # def _convert_boolean_columns(self, X: pd.DataFrame):
117 |     #     """Convert boolean columns to integers."""
118 |     #     X["has_incarnon"] = X["has_incarnon"].astype(int)
119 |     #     X["re_rolled"] = X["re_rolled"].astype(int)
120 |     #
121 |     # def _transform_disposition(self, X: pd.DataFrame):
122 |     #     """Impute and scale disposition values."""
123 |     #     X["disposition"] = X["disposition"].fillna(self.disposition_most_common)
124 |     #     X["disposition"] = self.disposition_scaler.transform(X[["disposition"]])
125 |     #
126 |     # def _transform_listing(self, X: pd.DataFrame):
127 |     #     """Log-transform and scale listing."""
128 |     #     X["listing"] = np.log1p(X["listing"])
129 |     #     X["listing"] = self.listing_scaler.transform(X[["listing"]])
130 |     #
131 |     # def _impute_and_transform_avg_trade_price(self, X: pd.DataFrame):
132 |     #     """Impute missing avg_trade_price and apply transformations."""
133 |     #     self._impute_avg_trade_price(X)
134 |     #     X["avg_trade_price"] = np.log1p(X["avg_trade_price"])
135 |     #     X["avg_trade_price"] = self.avg_trade_price_scaler.transform(X[["avg_trade_price"]])
136 |     #
137 |     # def _impute_avg_trade_price(self, X: pd.DataFrame):
138 |     #     """Impute missing avg_trade_price values using the regression model."""
139 |     #     # Map known avg_trade_price values
140 |     #     X["avg_trade_price"] = X["weapon_url_name"].map(self.known_avg_trade_prices)
141 |     #
142 |     #     # Identify weapons with missing avg_trade_price
143 |     #     missing_weapons = X[X["avg_trade_price"].isna()]["weapon_url_name"].unique()
144 |     #
145 |     #     for weapon in missing_weapons:
146 |     #         weapon_mask = X["weapon_url_name"] == weapon
147 |     #         weapon_data = X.loc[weapon_mask]
148 |     #
149 |     #         # Aggregate listing features for this weapon
150 |     #         features_df = self._aggregate_listing_features(weapon_data)
151 |     #
152 |     #         # Predict avg_trade_price
153 |     #         predicted_avg_trade_price = self.avg_trade_price_model.predict(features_df)[0]
154 |     #
155 |     #         # Assign predicted value
156 |     #         X.loc[weapon_mask, "avg_trade_price"] = predicted_avg_trade_price
157 |     #
158 |     # def _aggregate_listing_features(self, weapon_data: pd.DataFrame) -> pd.DataFrame:
159 |     #     """Aggregate listing features for a weapon."""
160 |     #     listings = weapon_data["listing"].values
161 |     #     features = {
162 |     #         'listing_mean': listings.mean(),
163 |     #         'listing_median': np.median(listings),
164 |     #         'listing_std': listings.std(ddof=0),
165 |     #         'listing_min': listings.min(),
166 |     #         'listing_max': listings.max()
167 |     #     }
168 |     #     features_df = pd.DataFrame([features])
169 |     #
170 |     #     # Handle any missing values in features
171 |     #     features_df = features_df.fillna(0)
172 |     #
173 |     #     return features_df
174 | 
175 |     @staticmethod
176 |     def split_X(X: pd.DataFrame) -> List[pd.DataFrame]:
177 |         """Split the DataFrame into components."""
178 |         return [
179 |             X[["weapon_url_name"]],
180 |             # X[["group"]],
181 |             # X[["has_incarnon"]],
182 |             # X[["avg_trade_price"]],
183 |             X[["re_rolled"]],
184 |             # X[["disposition"]],
185 |             X[["positive1", "positive2", "positive3", "negative"]],
186 |             X[["listing_price"]]
187 |         ]
188 | 
189 |     def save(self, filepath: str):
190 |         """Save the preprocessor instance to a pickle file."""
191 |         with open(filepath, "wb") as f:
192 |             pickle.dump(self, f)
193 | 
194 |     @staticmethod
195 |     def load(filepath: str) -> 'Preprocessor':
196 |         """Load the preprocessor instance from a pickle file."""
197 |         with open(filepath, "rb") as f:
198 |             return pickle.load(f)
199 | 
200 | 
201 | def get_model_architecture():
202 |     weapon_name_embedding_size = 32
203 |     attributes_embedding_size = 32
204 | 
205 |     # Load vocabularies
206 |     data_handler = DataHandler()
207 |     weapon_url_names = data_handler.get_url_names()
208 |     # group_names = data_handler.get_groups()
209 |     attributes = data_handler.get_attribute_names()
210 | 
211 |     # Inputs
212 |     weapon_url_name_input = layers.Input(shape=(1,), dtype=tf.string, name="weapon_url_name_input")
213 |     # group_input = layers.Input(shape=(1,), dtype=tf.string, name="group_input")
214 |     # has_incarnon_input = layers.Input(shape=(1,), dtype=tf.float32, name="has_incarnon_input")
215 |     # avg_trade_price_input = layers.Input(shape=(1,), dtype=tf.float32, name="avg_trade_price_input")
216 |     re_rolled_input = layers.Input(shape=(1,), dtype=tf.float32, name="re_rolled_input")
217 |     # disposition_input = layers.Input(shape=(1,), dtype=tf.float32, name="disposition_input")
218 |     attributes_input = layers.Input(shape=(4,), dtype=tf.string, name="attributes_input")
219 |     listing_input = layers.Input(shape=(1,), dtype=tf.float32, name="listing_input")
220 | 
221 |     # Lookups
222 |     weapon_url_name_lookup = layers.StringLookup(
223 |         vocabulary=weapon_url_names,
224 |         mask_token="<NONE>",
225 |         name="weapon_url_name_lookup"
226 |     )
227 |     # group_lookup = layers.StringLookup(
228 |     #     vocabulary=group_names,
229 |     #     mask_token="<NONE>",
230 |     #     name="group_lookup"
231 |     # )
232 |     attributes_lookup = layers.StringLookup(
233 |         vocabulary=attributes,
234 |         mask_token="<NONE>",
235 |         name="attributes_lookup"
236 |     )
237 | 
238 |     weapon_url_name_indices = weapon_url_name_lookup(weapon_url_name_input)
239 |     # group_indices = group_lookup(group_input)
240 |     attributes_indices = attributes_lookup(attributes_input)
241 | 
242 |     # Embeddings
243 |     weapon_url_name_embedding_layer = layers.Embedding(
244 |         input_dim=len(weapon_url_names) + 1,
245 |         output_dim=weapon_name_embedding_size,
246 |         name="weapon_url_name_embedding"
247 |     )
248 |     attributes_embedding_layer = layers.Embedding(
249 |         input_dim=len(attributes) + 1,
250 |         output_dim=attributes_embedding_size,
251 |         name="attributes_embedding"
252 |     )
253 | 
254 |     weapon_url_name_embedding_output = layers.Flatten()(weapon_url_name_embedding_layer(weapon_url_name_indices))
255 |     attributes_embedding_output = layers.Flatten()(attributes_embedding_layer(attributes_indices))
256 | 
257 |     # Combine Weapon and Attribute Embeddings
258 |     combined_embedding = layers.Concatenate(name="combined_embedding")(
259 |         [weapon_url_name_embedding_output, attributes_embedding_output, listing_input, ]
260 |     )
261 | 
262 |     # -- Dense Layers for Final Prediction --
263 |     x = combined_embedding
264 |     x = layers.Dense(units=128, activation="relu")(x)
265 |     x = layers.Dense(units=32, activation="relu")(x)
266 | 
267 |     output = layers.Dense(units=1, activation="sigmoid", name="output")(x)
268 | 
269 |     # Define the model with all inputs and the output
270 |     model = Model(inputs=[
271 |         weapon_url_name_input, re_rolled_input, attributes_input, listing_input
272 |     ], outputs=output, name="riven_model")
273 | 
274 |     return model
275 | 


--------------------------------------------------------------------------------
/tool_setup_and_maintenance/setup_weapon_information.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import gc
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import tqdm
  7 | from sklearn.cluster import MiniBatchKMeans
  8 | 
  9 | from warframe_marketplace_predictor.filepaths import *
 10 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 11 | from warframe_marketplace_predictor.shtuff.make_prediction import PricePredictor
 12 | from warframe_marketplace_predictor.shtuff.riven_funcs import get_possible_rivens
 13 | from warframe_marketplace_predictor.shtuff.storage_handling import save_json
 14 | 
 15 | os.environ["LOKY_MAX_CPU_COUNT"] = "4"
 16 | 
 17 | gpus = tf.config.list_physical_devices("GPU")
 18 | if gpus:
 19 |     try:
 20 |         for gpu in gpus:
 21 |             tf.config.experimental.set_memory_growth(gpu, True)
 22 |         logical_gpus = tf.config.list_logical_devices("GPU")
 23 |         print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
 24 |     except RuntimeError as e:
 25 |         print(e)
 26 | 
 27 | 
 28 | def compute_prices_frequency_distribution(riven_attribute_combo_types):
 29 |     """Calculate the normalized price distribution (prices_pdf) for the rivens."""
 30 | 
 31 |     attribute_combo_frequencies = collections.Counter(riven_attribute_combo_types)
 32 |     target_count = max(attribute_combo_frequencies.values())
 33 |     prices_frequencies = np.array([
 34 |         target_count / attribute_combo_frequencies[trait_combo_type]
 35 |         for trait_combo_type in riven_attribute_combo_types
 36 |     ])
 37 |     total_riven_amount = np.sum(prices_frequencies)
 38 |     prices_pdf = prices_frequencies / total_riven_amount
 39 |     return prices_pdf, prices_frequencies
 40 | 
 41 | 
 42 | def compute_attribute_importance(
 43 |         rivens, prices, price_frequencies, combo_types
 44 | ):
 45 |     """
 46 |     Compute the impact of each trait on the price and visualize it.
 47 | 
 48 |     Parameters:
 49 |     - rivens: List of dictionaries, each with 'positives' and 'negatives' traits.
 50 |     - prices: List of prices corresponding to each riven.
 51 |     - price_frequencies: List indicating the frequency or weight of each price.
 52 |     - combo_types: List indicating the combo type for each riven.
 53 | 
 54 |     Returns:
 55 |     - attribute_importance: Dictionary with normalized importance scores for positives and negatives.
 56 |     """
 57 |     attribute_importance = {
 58 |         "positives": collections.defaultdict(float),
 59 |         "negatives": collections.defaultdict(float)
 60 |     }
 61 | 
 62 |     # Initialize aggregation dictionary for each combo type
 63 |     combo_trait_impacts = {
 64 |         combo_type: {
 65 |             "positives": collections.defaultdict(float),
 66 |             "negatives": collections.defaultdict(float)
 67 |         }
 68 |         for combo_type in ["p2n0", "p2n1", "p3n0", "p3n1"]
 69 |     }
 70 | 
 71 |     # Aggregate the sum of prices within each trait category per combo type
 72 |     for riven, price, frequency, combo_type in zip(rivens, prices, price_frequencies, combo_types):
 73 |         for trait_type in ["positives", "negatives"]:
 74 |             for trait in riven[trait_type]:
 75 |                 combo_trait_impacts[combo_type][trait_type][trait] += price * frequency
 76 | 
 77 |     # Normalize category by the maximum
 78 |     for trait_impacts in combo_trait_impacts.values():
 79 |         for trait_type in ["positives", "negatives"]:
 80 |             traits = trait_impacts[trait_type]
 81 |             if traits:
 82 |                 min_value = min(traits.values())
 83 |                 max_value = max(traits.values())
 84 |                 if max_value > min_value:  # Avoid division by zero
 85 |                     trait_impacts[trait_type] = {
 86 |                         trait: value / max_value
 87 |                         for trait, value in traits.items()
 88 |                     }
 89 |                 else:
 90 |                     trait_impacts[trait_type] = {trait: 0 for trait in traits}  # All values are the same
 91 | 
 92 |     # Combine each category by an equal weighting
 93 |     num_combo_types = len(combo_trait_impacts)
 94 |     weight = 1.0 / num_combo_types if num_combo_types > 0 else 0
 95 |     for trait_impacts in combo_trait_impacts.values():
 96 |         for trait_type in ["positives", "negatives"]:
 97 |             for trait, normalized_value in trait_impacts[trait_type].items():
 98 |                 attribute_importance[trait_type][trait] += normalized_value * weight
 99 | 
100 |     # Final normalization of combined traits to the region (1, 0)
101 |     for trait_type in ["positives", "negatives"]:
102 |         if not attribute_importance[trait_type]:
103 |             continue
104 | 
105 |         min_combined = min(attribute_importance[trait_type].values())
106 |         max_combined = max(attribute_importance[trait_type].values())
107 |         if max_combined > min_combined:  # Avoid division by zero
108 |             attribute_importance[trait_type] = {
109 |                 trait: value / max_combined
110 |                 for trait, value in attribute_importance[trait_type].items()
111 |             }
112 |         else:
113 |             attribute_importance[trait_type] = {trait: 0 for trait in
114 |                                                 attribute_importance[trait_type]}  # All values are the same
115 | 
116 |         attribute_importance[trait_type] = {trait: attribute_importance[trait_type][trait]
117 |                                             for trait in sorted(attribute_importance[trait_type].keys(),
118 |                                                                 key=attribute_importance[trait_type].get,
119 |                                                                 reverse=True)}
120 | 
121 |     return attribute_importance
122 | 
123 | 
124 | def compute_sparse_prices_pdf_kmeans(prices, prices_frequencies, num_bins):
125 |     """
126 |     Compute a sparse representation of the price distribution using K-Means clustering.
127 | 
128 |     Parameters:
129 |     - prices (np.ndarray): Array of price values.
130 |     - prices_frequencies (np.ndarray): Corresponding frequencies of each price.
131 |     - num_bins (int): Desired number of bins.
132 | 
133 |     Returns:
134 |     - sparse_prices_distribution (dict): Mapping of expected price per bin to total frequency.
135 |     - bin_edges (np.ndarray): Edges of the bins.
136 |     """
137 |     # Reshape prices for clustering
138 |     prices_reshaped = prices.reshape(-1, 1)
139 | 
140 |     # Initialize KMeans with the desired number of bins
141 |     kmeans = MiniBatchKMeans(n_clusters=num_bins, batch_size=5120, random_state=42)
142 | 
143 |     # Fit KMeans with sample weights as frequencies
144 |     kmeans.fit(prices_reshaped, sample_weight=prices_frequencies)
145 | 
146 |     # Get cluster centers and sort them
147 |     cluster_centers = np.sort(kmeans.cluster_centers_.flatten())
148 | 
149 |     # Define bin edges as midpoints between cluster centers
150 |     bin_edges = np.concatenate([
151 |         [prices.min() - 1],  # Extend the first bin to include the minimum price
152 |         (cluster_centers[:-1] + cluster_centers[1:]) / 2,
153 |         [prices.max() + 1]  # Extend the last bin to include the maximum price
154 |     ])
155 | 
156 |     # Assign each price to a bin
157 |     bin_indices = np.digitize(prices, bin_edges) - 1  # bin_indices start from 0
158 | 
159 |     # Aggregate frequencies and compute expected price per bin
160 |     sparse_prices_distribution = {}
161 |     for bin_idx in range(num_bins):
162 |         in_bin = bin_indices == bin_idx
163 |         if not np.any(in_bin):
164 |             continue
165 |         total_frequency = np.sum(prices_frequencies[in_bin])
166 |         expected_price = np.average(prices[in_bin], weights=prices_frequencies[in_bin])
167 |         sparse_prices_distribution[expected_price] = total_frequency
168 | 
169 |     del kmeans
170 | 
171 |     return sparse_prices_distribution
172 | 
173 | 
174 | def compute_sparse_prices_pdf(prices, prices_frequencies, granularity):
175 |     """
176 |     Compute sparse representations of the price distribution using percentile-based and K-Means-based binning.
177 |     Plot both sparse distributions alongside the dense distribution for comparison.
178 | 
179 |     Parameters:
180 |     - prices (np.ndarray): Array of price values.
181 |     - prices_pdf (np.ndarray): Probability density function values for each price.
182 |     - prices_frequencies (np.ndarray): Corresponding frequencies of each price.
183 |     - granularity (int): Number of bins for percentile-based binning.
184 |     """
185 | 
186 |     # Sort the prices and corresponding data
187 |     sorted_indices = np.argsort(prices)
188 |     prices_sorted = prices[sorted_indices]
189 |     prices_frequencies_sorted = prices_frequencies[sorted_indices]
190 | 
191 |     sparse_prices_distribution_kmeans = compute_sparse_prices_pdf_kmeans(
192 |         prices_sorted, prices_frequencies_sorted, num_bins=granularity)
193 | 
194 |     return sparse_prices_distribution_kmeans
195 | 
196 | 
197 | def create_weapon_information():
198 |     price_predictor = PricePredictor(model_predict_batch_size=4096)
199 |     data_handler = DataHandler()
200 |     weapon_names = data_handler.get_url_names()
201 |     weapon_ranking_information = []
202 |     all_prices = []
203 |     all_prices_frequencies = []
204 | 
205 |     pbar = tqdm.tqdm(weapon_names, desc="Determining characteristics", unit="weapon category")
206 |     for weapon_name in pbar:
207 |         pbar.set_postfix(weapon=weapon_name)
208 | 
209 |         possible_rivens = get_possible_rivens(weapon_name, re_rolled=True)
210 |         if len(possible_rivens) == 0:
211 |             # Handle weapons with no possible rivens
212 |             print(f"No rivens found for weapon: {weapon_name}. Skipping.")
213 |             continue
214 | 
215 |         prices = price_predictor.predict(possible_rivens, verbose=False, skip_validation=True)
216 | 
217 |         # Get attribute combo types
218 |         riven_attribute_combo_types = [
219 |             f"p{len(riven['positives'])}n{len(riven['negatives'])}" for riven in possible_rivens
220 |         ]
221 | 
222 |         # Calculate the price distribution using the weighted data
223 |         prices_pdf, prices_frequencies = compute_prices_frequency_distribution(riven_attribute_combo_types)
224 | 
225 |         # Calculate the expected value (EV)
226 |         expected_price = np.dot(prices_pdf, prices)
227 | 
228 |         # Compute attribute importance
229 |         attribute_importance = compute_attribute_importance(possible_rivens, prices, prices_frequencies,
230 |                                                             riven_attribute_combo_types)
231 | 
232 |         # Collect all prices and prices_pdf for global processing
233 |         all_prices.extend(prices.tolist())
234 |         all_prices_frequencies.extend(prices_frequencies.tolist())
235 | 
236 |         # Compute sparse prices_pdf for the weapon
237 |         weapon_prices_pdf_sparse = compute_sparse_prices_pdf(prices, prices_frequencies, granularity=2000)
238 | 
239 |         weapon_ranking_information.append((weapon_name, expected_price,
240 |                                            attribute_importance, weapon_prices_pdf_sparse))
241 | 
242 |         # Clear memory
243 |         del possible_rivens, prices
244 |         gc.collect()  # Godly
245 | 
246 |         # Clear the Keras/TensorFlow session
247 |         tf.keras.backend.clear_session()
248 | 
249 |     # Sort the rankings by expected value
250 |     weapon_ranking_information.sort(key=lambda x: x[1], reverse=True)
251 |     weapon_ranking_information = {
252 |         weapon_name: {
253 |             "rank": i,
254 |             "expected_value": expected_value,
255 |             "attribute_importance": attribute_importance,
256 |             "price_distribution": price_distribution
257 |         }
258 |         for i, (weapon_name, expected_value, attribute_importance, price_distribution) in
259 |         enumerate(weapon_ranking_information, start=1)
260 |     }
261 | 
262 |     # Save the results to JSON files
263 |     save_json(weapon_ranking_information_file_path, weapon_ranking_information)
264 | 
265 |     # After processing all weapons, compute global price bins using sparse representation
266 |     all_prices = np.array(all_prices)
267 |     all_prices_frequencies = np.array(all_prices_frequencies)
268 |     global_price_freq = compute_sparse_prices_pdf(all_prices, all_prices_frequencies, granularity=1000)
269 | 
270 |     # Save the results to JSON files
271 |     save_json(global_price_freq_file_path, global_price_freq)
272 | 
273 |     print("Finished evaluating weapons.")
274 | 
275 | 
276 | def main():
277 |     create_weapon_information()  # ~20 min
278 | 
279 | 
280 | if __name__ == "__main__":
281 |     main()
282 | 


--------------------------------------------------------------------------------
/shtuff/WIP_liquidity_gradient.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, Union, Iterable
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import pandas as pd
  6 | import tensorflow as tf
  7 | from tqdm import tqdm
  8 | from scipy.ndimage import gaussian_filter1d
  9 | from warframe_marketplace_predictor.filepaths import *
 10 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 11 | from warframe_marketplace_predictor.training.preprocessors.liquidity_model_preprocessor import Preprocessor
 12 | 
 13 | 
 14 | class PricePredictor:
 15 |     def __init__(self, model_predict_batch_size: int = 256):
 16 |         """
 17 |         Initializes the PricePredictor class, loading the preprocessor, model,
 18 |         attribute name shortcuts, and item name to URL mapping only once.
 19 |         """
 20 |         self.model_predict_batch_size = model_predict_batch_size
 21 |         self.data_handler = DataHandler()
 22 | 
 23 |         # Load the preprocessor and model once
 24 |         self.preprocessor = Preprocessor().load(liquidity_model_preprocessor_file_path)
 25 |         self.model: tf.keras.Model = tf.keras.models.load_model(liquidity_model_model_file_path)
 26 |         self._mask_token = "<NONE>"
 27 | 
 28 |         self.price_range_ = np.linspace(0, 10_000, 1, endpoint=True, dtype=np.float32)
 29 | 
 30 |     def is_valid(self, item: Dict[str, Any]) -> bool:
 31 |         """
 32 |         Checks if the provided attribute names (positives + negatives) are valid by comparing them against the
 33 |         shortcuts.
 34 | 
 35 |         Args:
 36 |             item (Dict[str, Any]): A dictionary containing the item's "positives" and "negatives" attributes.
 37 | 
 38 |         Returns:
 39 |             bool: True if all attribute names are valid, False otherwise.
 40 |         """
 41 |         if not self.data_handler.weapon_exists(item["name"]):
 42 |             print(f"{item['name']} is not a valid weapon name")
 43 |             print("Name suggestions:")
 44 |             print([k for k in sorted(self.data_handler.get_item_names())
 45 |                    if k and item["name"] and (k[0]).lower() == (item["name"][0]).lower()])
 46 |             return False
 47 | 
 48 |         if "re_rolls" in item:
 49 |             if not isinstance(item["re_rolls"], int):
 50 |                 print("'re_rolls' must be an integer.")
 51 |                 return False
 52 |             item["re_rolled"] = item["re_rolls"] > 0
 53 | 
 54 |         if "re_rolled" not in item or not isinstance(item["re_rolled"], bool):
 55 |             print("'re_rolled' is missing or incorrectly formatted.")
 56 |             return False
 57 | 
 58 |         # Combine the positives and negatives from the item to validate
 59 |         attribute_names = item["positives"] + item["negatives"]
 60 |         for attribute_name in attribute_names:
 61 |             if not self.data_handler.is_valid_attribute_shortcut(attribute_name):
 62 |                 print(f"{attribute_name} is not a valid attribute.")
 63 |                 print("Did you mean:")
 64 |                 print([k for k in sorted(self.data_handler.get_attribute_shortcuts())
 65 |                        if k and attribute_name and k[0] == attribute_name[0]])
 66 |                 return False
 67 | 
 68 |         return True
 69 | 
 70 |     def prepare(self, item: Dict[str, Any]) -> Dict[str, Any]:
 71 |         res = {
 72 |             "weapon_url_name": self.data_handler.get_url_name(item["name"]),
 73 |             "positive1": item["positives"][0] if len(item["positives"]) >= 1 else self._mask_token,
 74 |             "positive2": item["positives"][1] if len(item["positives"]) >= 2 else self._mask_token,
 75 |             "positive3": item["positives"][2] if len(item["positives"]) >= 3 else self._mask_token,
 76 |             "negative": item["negatives"][0] if len(item["negatives"]) >= 1 else self._mask_token,
 77 |             "re_rolled": item["re_rolled"],
 78 |             # "group": self.data_handler.get_weapon_group(self.data_handler.get_url_name(item["name"])),
 79 |             # "has_incarnon": self.data_handler.weapon_has_incarnon(self.data_handler.get_url_name(item["name"])),
 80 |             # "avg_trade_price": self.data_handler.get_average_trade_price(
 81 |             #     self.data_handler.get_url_name(item["name"]), rolled_status="rerolled"),
 82 |             # "disposition": self.data_handler.get_disposition(self.data_handler.get_url_name(item["name"])),
 83 |         }
 84 |         return res
 85 | 
 86 |     def get_prepared_data(self, data: Iterable[Dict[str, Any]], skip_validation: bool, verbose: bool) -> pd.DataFrame:
 87 |         prepared_data = []
 88 | 
 89 |         iterator = tqdm(data, desc="Preparing data", unit="riven") if verbose else data
 90 |         for item in iterator:
 91 | 
 92 |             # Check for invalid names or shortcuts
 93 |             if not skip_validation and not self.is_valid(item):
 94 |                 return pd.DataFrame()
 95 | 
 96 |             row = self.prepare(item)
 97 | 
 98 |             if not skip_validation:
 99 |                 row["positive1"] = self.data_handler.get_proper_attribute_name(row["positive1"])
100 |                 row["positive2"] = self.data_handler.get_proper_attribute_name(row["positive2"])
101 |                 row["positive3"] = self.data_handler.get_proper_attribute_name(row["positive3"])
102 |                 row["negative"] = self.data_handler.get_proper_attribute_name(row["negative"])
103 | 
104 |             for val in self.price_range_:
105 |                 row_c = row.copy()
106 |                 row_c["listing_price"] = val
107 |                 prepared_data.append(row_c)
108 | 
109 |         prepared_data = pd.DataFrame(prepared_data)
110 |         return prepared_data
111 | 
112 |     def predict(self,
113 |                 data: Union[Iterable[Dict[str, Any]], Dict[str, Any], pd.DataFrame],
114 |                 price_range: Iterable[float] = None,
115 |                 verbose: bool = True,
116 |                 skip_validation: bool = False, raw: bool = False) -> np.ndarray:
117 |         """
118 |         Predicts outcomes based on the provided input data using a pre-trained model.
119 |         """
120 | 
121 |         if isinstance(data, dict):
122 |             data = [data]
123 | 
124 |         if price_range is not None:
125 |             self.price_range_ = np.array(price_range, dtype=np.float32)
126 | 
127 |         if not raw:
128 |             data = self.get_prepared_data(data, skip_validation, verbose)
129 | 
130 |         model_ready_data = self.preprocessor.transform(data)
131 |         predictions = self.model.predict(model_ready_data,
132 |                                          batch_size=self.model_predict_batch_size,
133 |                                          verbose=verbose)
134 | 
135 |         if raw:
136 |             return predictions
137 |         else:
138 |             return predictions.reshape(-1, len(self.price_range_))
139 | 
140 | 
141 | def main():
142 |     # Example data
143 |     rivens = [
144 |         {
145 |             "name": "Latron",
146 |             "positives": ["ms", "sc", "cd"],
147 |             "negatives": ["zoom"],
148 |             "re_rolled": True
149 |         },
150 |         # {
151 |         #     "name": "Praedos",
152 |         #     "positives": ["cc", "dmg", "slash"],
153 |         #     "negatives": [""],
154 |         #     "re_rolled": True
155 |         # },
156 |         # {
157 |         #     "name": "Zenith",
158 |         #     "positives": ["ms", "cd", "cc"],
159 |         #     "negatives": [""],
160 |         #     "re_rolled": True
161 |         # },
162 |         # {
163 |         #     "name": "Cyngas",
164 |         #     "positives": ["corpus", "zoom", ""],
165 |         #     "negatives": ["cc"],
166 |         #     "re_rolled": True
167 |         # },
168 |         # {
169 |         #     "name": "Sydon",
170 |         #     "positives": ["cc", "cd", "speed"],
171 |         #     "negatives": ["punc"],
172 |         #     "re_rolled": True
173 |         # },
174 |         # {
175 |         #     "name": "Acceltra",
176 |         #     "positives": ["speed", "dmg", "cc"],
177 |         #     "negatives": [""],
178 |         #     "re_rolled": True
179 |         # },
180 |     ]
181 | 
182 |     predictor = PricePredictor()
183 |     predictions = predictor.predict(rivens, price_range=list(range(0, 10_001, 1)))
184 |     predictions = gaussian_filter1d(predictions, sigma=1)
185 | 
186 |     plt.figure(figsize=(16, 8))
187 | 
188 |     # Generate colors using colormap for better distinction
189 |     colors = plt.cm.tab10(np.linspace(0, 1, len(rivens), endpoint=True))
190 | 
191 |     lines = []
192 | 
193 |     # Plot the predictions with improved labels and line styles
194 |     for p, r, color in zip(predictions, rivens, colors):
195 |         label = f"{r['name']} | Positives: {', '.join(filter(None, r['positives']))}"
196 |         if r['negatives'][0] != "":
197 |             label += f" | Negative: {r['negatives'][0]}"
198 |         line, = plt.plot(predictor.price_range_, p, label=label, linewidth=2.5, color=color)
199 |         lines.append(line)
200 | 
201 |     # Format y-axis as percentages and adjust ticks
202 |     plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y * 100:.0f}%'))
203 |     plt.yticks(np.arange(0, 1.1, 0.1), fontweight="bold")
204 |     plt.ylim([-0.05, 1.05])
205 | 
206 |     # Set axis labels with increased font size and weight
207 |     plt.xlabel("Platinum Price", fontsize=14, weight='bold')
208 |     plt.ylabel("Confidence (%)", fontsize=14, weight='bold')
209 | 
210 |     # Define function to find the index where confidence drops below 50%
211 |     def find_rightmost_index(arr, threshold=0.5):
212 |         indices = np.where(arr >= threshold)[0]
213 |         return indices[-1] if len(indices) > 0 else 0
214 | 
215 |     # Adjust x-axis limits based on data
216 |     x_limit = max(predictor.price_range_[find_rightmost_index(p)] for p in predictions)
217 |     x_limit_padding = 0.2 * x_limit
218 |     plt.xlim([-x_limit_padding / 3, x_limit + x_limit_padding])
219 |     plt.xticks(np.arange(0, x_limit + x_limit_padding, 100), fontweight="bold")
220 | 
221 |     # Add horizontal line at 50% confidence and shaded regions
222 |     plt.axhline(0.5, color="black", linestyle="--", linewidth=1)
223 |     plt.fill_between([-20_000, 20_000], 0.5, 1.05, color="green", alpha=0.1)
224 |     plt.fill_between([-20_000, 20_000], -0.05, 0.5, color="red", alpha=0.1)
225 | 
226 |     # Enhance grid and legend
227 |     plt.grid(True, linestyle='--', alpha=0.7)
228 |     plt.legend(fontsize=16, loc='upper right', fancybox=True, framealpha=1, edgecolor='black', facecolor='white')
229 | 
230 |     # Set title with increased font size and weight
231 |     plt.title("Model Confidence in Possible Sale Prices (Item Liquidity Proof of Concept)", fontsize=20, weight='bold')
232 |     plt.tight_layout()
233 | 
234 |     # Offsets for annotations to avoid overlap
235 |     offsets = [(-50, -0.05), (50, 0.05), (-50, 0.05), (50, -0.05)]
236 | 
237 |     for i, (p, line) in enumerate(zip(predictions, lines)):
238 |         index_of_interest = find_rightmost_index(p)
239 | 
240 |         # Draw vertical line where confidence dips below 50%
241 |         plt.axvline(predictor.price_range_[index_of_interest], 0, p[index_of_interest], color="gray", linestyle="--",
242 |                     linewidth=1)
243 | 
244 |         x_value = predictor.price_range_[index_of_interest]
245 |         y_value = 0
246 |         plt.annotate(
247 |             f"50% at {x_value:.0f}p",
248 |             xy=(x_value, y_value),
249 |             xytext=(x_value - 30, y_value),
250 |             fontsize=11,
251 |             fontweight='bold',
252 |             color="white",
253 |             bbox=dict(boxstyle="round,pad=0.3", edgecolor='white', facecolor='black', alpha=0.7)
254 |         )
255 | 
256 |         # Calculate expected price and corresponding confidence
257 |         above_50s_indices = np.where(p >= np.float32(0.5))[0]
258 |         filtered_prices = predictor.price_range_[above_50s_indices]
259 |         filtered_probs = p[above_50s_indices]
260 | 
261 |         expected_price = np.dot(filtered_prices, filtered_probs) / np.sum(filtered_probs)
262 |         confidence_at_expected_price = np.interp(expected_price, predictor.price_range_, p)
263 | 
264 |         # Plot marker at expected price
265 |         plt.scatter(expected_price, confidence_at_expected_price, color=line.get_color(), marker='o', s=100, zorder=5)
266 | 
267 |         # Annotate expected price with adjusted positions
268 |         offset = offsets[i % len(offsets)]
269 |         # print(rivens[i]["name"], expected_price)
270 |         plt.annotate(
271 |             f"Expected Price: {expected_price:.0f}p",
272 |             xy=(expected_price, confidence_at_expected_price),
273 |             xytext=(expected_price + offset[0], confidence_at_expected_price + offset[1]),
274 |             arrowprops=dict(arrowstyle="->", color='gray'),
275 |             fontsize=11,
276 |             fontweight='bold',
277 |             color="white",
278 |             bbox=dict(boxstyle="round,pad=0.3", edgecolor='white', facecolor='black', alpha=0.7)
279 |         )
280 | 
281 |         b = sum(p[:index_of_interest]) / len(p[:index_of_interest]) * expected_price
282 |         c = np.interp(b, predictor.price_range_[:index_of_interest], p[:index_of_interest])
283 |         plt.annotate(
284 |             f"Weighted Price: {b:.0f}p",
285 |             xy=(np.average(p[:index_of_interest]) * expected_price, c),
286 |             xytext=(np.average(p[:index_of_interest]) * expected_price + offset[0], c + offset[1]),
287 |             arrowprops=dict(arrowstyle="->", color='gray'),
288 |             fontsize=11,
289 |             fontweight='bold',
290 |             color="white",
291 |             bbox=dict(boxstyle="round,pad=0.3", edgecolor='white', facecolor='black', alpha=0.7)
292 |         )
293 | 
294 |     # plt.savefig(fname="img.png")
295 |     plt.show()
296 | 
297 | 
298 | if __name__ == "__main__":
299 |     main()
300 | 


--------------------------------------------------------------------------------
/tool_setup_and_maintenance/create_marketplace_dataframe.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import random
  3 | 
  4 | import pandas as pd
  5 | import tqdm
  6 | 
  7 | from warframe_marketplace_predictor.filepaths import *
  8 | from warframe_marketplace_predictor.shtuff.WIP_bias_adjustor import adjust_values
  9 | from warframe_marketplace_predictor.shtuff.data_handler import DataHandler
 10 | from warframe_marketplace_predictor.shtuff.storage_handling import read_json
 11 | 
 12 | 
 13 | def create_df() -> None:
 14 |     """Creates a dataframe from raw marketplace data and saves it as a CSV."""
 15 |     df_rows = []
 16 | 
 17 |     # Load raw marketplace data
 18 |     marketplace_data = read_json(raw_marketplace_data_file_path)
 19 | 
 20 |     # Convert marketplace data into a pandas dataframe of listings
 21 |     pbar = tqdm.tqdm(marketplace_data, desc="Listings processed", unit="listing", total=len(marketplace_data))
 22 |     for listing in pbar:
 23 |         df_row = dict()
 24 | 
 25 |         # Extract listing details
 26 |         df_row["id"] = listing["id"]
 27 |         df_row["created"] = listing["created"]
 28 |         df_row["captured_date"] = listing.get("captured_date")
 29 | 
 30 |         item = listing["item"]
 31 |         df_row["weapon_url_name"] = item["weapon_url_name"]
 32 |         df_row["polarity"] = item["polarity"]
 33 |         df_row["mod_rank"] = item["mod_rank"]
 34 |         df_row["re_rolls"] = item["re_rolls"]
 35 |         df_row["re_rolled"] = item["re_rolls"] > 0
 36 |         df_row["master_level"] = item["mastery_level"]
 37 | 
 38 |         # Get riven attribute names and values
 39 |         attributes = item["attributes"]
 40 |         attribute_names = {"positive1": None, "positive2": None, "positive3": None, "negative": None}
 41 |         attribute_values = {"positive1_value": None, "positive2_value": None, "positive3_value": None,
 42 |                             "negative_value": None}
 43 |         i = 1
 44 |         for attribute in attributes:
 45 |             if attribute["positive"]:
 46 |                 attribute_names[f"positive{i}"] = attribute["url_name"]
 47 |                 attribute_values[f"positive{i}_value"] = attribute["value"]
 48 |                 i += 1
 49 |             else:
 50 |                 attribute_names["negative"] = attribute["url_name"]
 51 |                 attribute_values["negative_value"] = attribute["value"]
 52 |         df_row.update(attribute_names)
 53 |         df_row.update(attribute_values)
 54 | 
 55 |         # Get prices associated with the riven
 56 |         df_row["is_direct_sell"] = listing["is_direct_sell"]
 57 |         df_row["starting_price"] = listing["starting_price"]
 58 |         df_row["buyout_price"] = listing["buyout_price"]
 59 |         df_rows.append(df_row)
 60 | 
 61 |     # Save dataframe to CSV
 62 |     df = pd.DataFrame(df_rows)
 63 |     df.to_csv(marketplace_dataframe_file_path, index=False)
 64 | 
 65 |     print(f"Marketplace dataframe created.")
 66 |     print(f"Total (rows, cols): {df.shape}\n")
 67 | 
 68 | 
 69 | def add_days_listed_and_has_sold_columns():
 70 |     """
 71 |     Adds a 'days_listed' column to the marketplace dataframe, which is calculated as the difference in days
 72 |     between 'captured_date' and 'created'. Handles cases where 'captured_date' is None by setting 'days_since_capture'
 73 |     to NaN for those entries. Also adds a 'has_sold' column based on whether 'captured_date' is less than the most
 74 |     recent 'captured_date'.
 75 | 
 76 |     'captured_date' is of the format: datetime.date.today().isoformat()
 77 |     'created' is of the format: 2024-09-22T15:41:54.000+00:00
 78 |     """
 79 |     # Load the existing dataframe
 80 |     df = pd.read_csv(marketplace_dataframe_file_path)
 81 | 
 82 |     # Convert 'created' to datetime, ensuring to parse the timezone information
 83 |     df['created'] = pd.to_datetime(df['created'], format="%Y-%m-%dT%H:%M:%S.%f%z", errors='coerce')
 84 | 
 85 |     # Convert 'captured_date' to datetime and make it timezone-naive (remove tz info)
 86 |     df['captured_date'] = pd.to_datetime(df['captured_date'], format="%Y-%m-%d", errors='coerce')
 87 | 
 88 |     # Remove timezone information from 'created' to make both columns tz-naive
 89 |     df['created'] = df['created'].dt.tz_localize(None)
 90 | 
 91 |     # Calculate 'days_since_capture' only for rows with non-null 'captured_date'
 92 |     df['days_listed'] = (df['captured_date'] - df['created'].dt.floor('D')).dt.days
 93 | 
 94 |     # Find the most recent capture date
 95 |     most_recent_capture_date = df['captured_date'].max()
 96 | 
 97 |     # Add 'has_sold' column based on whether 'captured_date' is less than the most recent capture date
 98 |     df['has_sold'] = df['captured_date'] < most_recent_capture_date  # TODO: Improve from naive solution
 99 | 
100 |     # Save the updated dataframe
101 |     df.to_csv(marketplace_dataframe_file_path, index=False)
102 | 
103 |     print(f"Columns 'days_listed' and 'has_sold' added to the dataframe.")
104 |     print(f"Total (rows, cols): {df.shape}\n")
105 | 
106 | 
107 | def handle_prices() -> None:
108 |     """
109 |     Consolidates the starting and buyout prices into a single price.
110 | 
111 |     Filters out users who set unrealistic pricing, such as infinite maximum bids or excessively large spreads between starting and buyout prices.
112 |     The goal is to return a dataset of more reasonable and focused price estimates.
113 |     """
114 |     df = pd.read_csv(marketplace_dataframe_file_path)
115 |     original_size = df.shape[0]
116 |     df = df.dropna(subset=["buyout_price"])  # Drop rows with None buyout_price
117 |     df = df[(df["buyout_price"] >= 10) & (df["buyout_price"] <= 10_000)]  # Keep rows with 10 <= buyout_price <= 10,000
118 |     # df = df[df["buyout_price"] <= 5 * df["starting_price"]]
119 |     df = df[df["is_direct_sell"] == True]
120 |     df["listing_price"] = df["buyout_price"]  # Use buyout_price as the listing price
121 |     df.to_csv(marketplace_dataframe_file_path, index=False)
122 | 
123 |     print(f"Column 'listing_price' added to the dataframe.")
124 |     print(f"Dropped {original_size - df.shape[0]} invalidly priced rows.")
125 |     print(f"Total (rows, cols): {df.shape}\n")
126 | 
127 | 
128 | def _WIP_create_estimated_trade_price() -> None:
129 |     """
130 |     Attempts to shift the listed price distribution to more accurately reflect the traded price distribution.
131 | 
132 |     Note: This function is a work in progress and may not be fully functional.
133 |     """
134 |     # Read the data files
135 |     df = pd.read_csv(marketplace_dataframe_file_path)
136 |     developer_summary_statistics = read_json(developer_summary_stats_file_path)
137 | 
138 |     # Get unique weapon names
139 |     weapon_names = df["weapon_url_name"].unique()
140 | 
141 |     results = []
142 |     for weapon_name in weapon_names:
143 |         weapon_listings = df[df["weapon_url_name"] == weapon_name]
144 |         listing_prices = weapon_listings["listing_price"]
145 |         if weapon_name in developer_summary_statistics:
146 |             traded_summary_statistics = developer_summary_statistics[weapon_name]["combined_stats"]
147 |             estimated_trade_prices = adjust_values(listing_prices, traded_summary_statistics)
148 |             results.append((weapon_name, estimated_trade_prices))
149 |         else:
150 |             print("Warning:", weapon_name, "does not exist in developer summary statistics.")
151 |             results.append((weapon_name, listing_prices))
152 | 
153 |     # Update the dataframe with the estimated trade prices
154 |     for weapon_name, estimated_trade_prices in results:
155 |         df.loc[df["weapon_url_name"] == weapon_name, "estimated_trade_price"] = estimated_trade_prices
156 | 
157 |     # Save the updated dataframe
158 |     df.to_csv(marketplace_dataframe_file_path, index=False)
159 |     print("Estimated trade price added via the traded summary statistics.")
160 | 
161 | 
162 | def add_supplementary_weapon_information() -> None:
163 |     """
164 |     Adds supplementary information to the marketplace weapon data CSV file.
165 | 
166 |     This function reads weapon data from a CSV, adds additional columns for:
167 |     - Weapon Group: Categorizes the weapon.
168 |     - Disposition: A value representing the weapon's disposition.
169 |     - Incarnon Status: Boolean indicating if the weapon has Incarnon capabilities.
170 |     - Average Trade Price: The average trade price of the weapon considering its reroll status.
171 | 
172 |     It then saves the updated data back to the CSV file.
173 |     """
174 |     # Load the CSV data
175 |     df = pd.read_csv(marketplace_dataframe_file_path)
176 | 
177 |     # Instantiate DataHandler for accessing supplementary data
178 |     data_handler = DataHandler()
179 | 
180 |     # Apply tqdm to the iteration
181 |     tqdm.tqdm.pandas(desc="Adding supplementary data", unit="listing")
182 | 
183 |     # Define a function to process each row
184 |     def process_row(row):
185 |         x = row["weapon_url_name"]
186 |         rerolled = "rerolled" if row["re_rolled"] else "unrolled"
187 |         group = data_handler.get_weapon_group(x)
188 |         disposition = data_handler.get_disposition(x)
189 |         has_incarnon = data_handler.weapon_has_incarnon(x)
190 |         avg_trade_price = data_handler.get_average_trade_price(x, rolled_status=rerolled)
191 | 
192 |         return pd.Series({
193 |             "group": group,
194 |             "disposition": disposition,
195 |             "has_incarnon": has_incarnon,
196 |             "avg_trade_price": avg_trade_price
197 |         })
198 | 
199 |     # Apply the function to each row
200 |     supplementary_data = df.progress_apply(process_row, axis=1)
201 | 
202 |     # Concatenate the supplementary data with the original data
203 |     df = pd.concat([df, supplementary_data], axis=1)
204 | 
205 |     # Save the updated dataframe back to CSV
206 |     df.to_csv(marketplace_dataframe_file_path, index=False)
207 | 
208 |     print("Added supplementary weapon information: Weapon Group, Disposition, Incarnon, and Average Trade Price.")
209 |     print(f"Total (rows, cols): {df.shape}\n")
210 | 
211 | 
212 | def add_permutation_data() -> None:
213 |     """
214 |     Generates and adds permutation data for weapons to the marketplace weapon data CSV file.
215 | 
216 |     This function reads weapon data from a specified CSV file, creates new rows based on the positive attributes of each weapon,
217 |     and ensures that the generated permutations do not result in duplicates. It skips permutations where at least two of the positive
218 |     attributes are elemental damages, avoiding excessive elemental combinations.
219 | 
220 |     A random factor is introduced to control the volume of new data generated, allowing for the addition of
221 |     artificial entries without overwhelming the dataset.
222 |     """
223 |     random.seed(42)
224 |     EXTRA_ARTIFICIAL_DATA_FACTOR = 0.1
225 | 
226 |     # Load the CSV data
227 |     df = pd.read_csv(marketplace_dataframe_file_path)
228 | 
229 |     new_rows = []
230 |     elemental_attributes = ["heat_damage", "cold_damage", "electric_damage", "toxin_damage"]
231 | 
232 |     pbar = tqdm.tqdm(total=len(df), desc="Permuting attributes", unit="listing")
233 | 
234 |     for index, row in df.iterrows():
235 |         if random.random() > EXTRA_ARTIFICIAL_DATA_FACTOR:
236 |             pbar.update(1)
237 |             pbar.set_postfix(added_permutations=len(new_rows))
238 |             continue
239 | 
240 |         positive_attributes = [row["positive1"], row["positive2"], row["positive3"]]
241 |         positive_attributes = [x for x in positive_attributes if not pd.isna(x)]
242 | 
243 |         # Skip if at least two positives are elemental
244 |         if sum(attr in elemental_attributes for attr in positive_attributes) >= 2:
245 |             pbar.update(1)
246 |             continue
247 | 
248 |         for positive_attribute_perm in itertools.permutations(positive_attributes):
249 |             new_row = row.copy()
250 | 
251 |             # Update the new row with the new positive attributes
252 |             for i in range(len(positive_attribute_perm)):
253 |                 new_row[f"positive{i + 1}"] = positive_attribute_perm[i]
254 | 
255 |             new_rows.append(new_row)
256 | 
257 |         pbar.update(1)  # Update progress bar
258 | 
259 |     # Convert new_rows to a DataFrame and concatenate with original data
260 |     new_rows_df = pd.DataFrame(new_rows)
261 |     df = pd.concat([df, new_rows_df], ignore_index=True)
262 | 
263 |     # Save the updated dataframe back to CSV
264 |     df.to_csv(marketplace_dataframe_file_path, index=False)
265 | 
266 |     print(f"Artificial permutation data created.")
267 |     print(f"New rows added: {len(new_rows)}")
268 |     print(f"Total (rows, cols): {df.shape}\n")
269 | 
270 | 
271 | def remove_duplicate_rows() -> None:
272 |     """
273 |     Removes duplicate rows from the marketplace weapon data CSV file.
274 | 
275 |     This function reads the existing CSV data, drops any duplicate rows,
276 |     and saves the cleaned data back to the CSV file.
277 |     """
278 |     # Load the CSV data
279 |     df = pd.read_csv(marketplace_dataframe_file_path)
280 |     original_size = df.shape[0]
281 | 
282 |     # Remove duplicate rows based on weapon attributes
283 |     cleaned_data = df.drop_duplicates(subset=[
284 |         "weapon_url_name", "polarity", "mod_rank", "re_rolls",
285 |         "positive1", "positive2", "positive3",
286 |         "negative",
287 |         "positive1_value", "positive2_value", "positive3_value",
288 |         "negative_value",
289 |     ])
290 | 
291 |     # Save the cleaned dataframe back to CSV
292 |     cleaned_data.to_csv(marketplace_dataframe_file_path, index=False)
293 | 
294 |     print(f"Duplicate rows removed.")
295 |     print(f"Removed {original_size - cleaned_data.shape[0]} rows.")
296 |     print(f"Total (rows, cols): {cleaned_data.shape}\n")
297 | 
298 | 
299 | def minor_final_adjustments():
300 |     """
301 |     Performs minor final adjustments to the dataset.
302 | 
303 |     This function shuffles the dataset to ensure that the data is in random order.
304 |     """
305 |     df = pd.read_csv(marketplace_dataframe_file_path)
306 | 
307 |     # Shuffle dataset
308 |     df = df.sample(frac=1, random_state=42).reset_index(drop=True)
309 | 
310 |     df.to_csv(marketplace_dataframe_file_path, index=False)
311 | 
312 |     print("Final touches done.")
313 |     print(f"Total (rows, cols): {df.shape}\n")
314 | 
315 | 
316 | def main():
317 |     running = [
318 |         {"run": True, "func": create_df},
319 |         {"run": False, "func": add_days_listed_and_has_sold_columns},
320 |         {"run": True, "func": handle_prices},
321 |         {"run": False, "func": _WIP_create_estimated_trade_price},  # Function is under development
322 |         {"run": False, "func": add_supplementary_weapon_information},
323 |         {"run": False, "func": add_permutation_data},
324 |         {"run": True, "func": remove_duplicate_rows},
325 |         {"run": True, "func": minor_final_adjustments},
326 |     ]
327 | 
328 |     for action in running:
329 |         if action["run"]:
330 |             action["func"]()
331 | 
332 |     print("You may now train the model.")
333 | 
334 | 
335 | if __name__ == "__main__":
336 |     main()
337 | 


--------------------------------------------------------------------------------
/shtuff/data_handler.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Any, Optional
  2 | 
  3 | from warframe_marketplace_predictor.filepaths import *
  4 | from warframe_marketplace_predictor.shtuff.storage_handling import read_json
  5 | 
  6 | 
  7 | class DataHandler:
  8 | 
  9 |     def __init__(self):
 10 |         self.items_data_mapped_by_item_name = None
 11 |         self.items_data_mapped_by_url_name = None
 12 |         self.name_inverse_mapping = None
 13 |         self.attributes_data = None
 14 |         self.attribute_name_shortcuts = None
 15 |         self.weapon_ranking_information = None
 16 |         self.global_price_freq = None
 17 |         self.ig_weapon_stats = None
 18 |         self.developer_summary_stats = None
 19 | 
 20 |         self.variants = None
 21 | 
 22 |     def load_items(self):
 23 |         """ Load items data and generate mappings. """
 24 |         if self.items_data_mapped_by_item_name is None:
 25 |             self.items_data_mapped_by_item_name = read_json(items_data_file_path)
 26 |             self.items_data_mapped_by_url_name = {v["url_name"]: v for v in
 27 |                                                   self.items_data_mapped_by_item_name.values()}
 28 |             self.name_inverse_mapping = {k: v["url_name"] for k, v in self.items_data_mapped_by_item_name.items()}
 29 |             self.name_inverse_mapping.update({v: k for k, v in self.name_inverse_mapping.items()})
 30 | 
 31 |     def load_attributes(self):
 32 |         """ Load attributes data and generate attribute shortcuts. """
 33 |         if self.attributes_data is None or self.attribute_name_shortcuts is None:
 34 |             if attributes_data_file_path and attribute_name_shortcuts_file_path:
 35 |                 self.attributes_data = read_json(attributes_data_file_path)
 36 |                 self.attribute_name_shortcuts = read_json(attribute_name_shortcuts_file_path)
 37 |                 self.attribute_name_shortcuts.update({v: v for v in self.attribute_name_shortcuts.values()})
 38 | 
 39 |     def load_weapon_ranking_information(self):
 40 |         """ Load weapon ranking information data. """
 41 |         if self.weapon_ranking_information is None:
 42 |             self.weapon_ranking_information = read_json(weapon_ranking_information_file_path)
 43 | 
 44 |     def load_global_price_freq(self):
 45 |         """ Load global price frequency data. """
 46 |         if self.global_price_freq is None:
 47 |             self.global_price_freq = read_json(global_price_freq_file_path)
 48 | 
 49 |     def load_ig_weapon_stats(self):
 50 |         """ Load in-game weapon statistics data. """
 51 |         if self.ig_weapon_stats is None:
 52 |             self.ig_weapon_stats = read_json(ig_weapon_stats_file_path)
 53 | 
 54 |     def load_developer_summary_stats(self):
 55 |         """ Load developer summary statistics data. """
 56 |         if self.developer_summary_stats is None:
 57 |             self.load_items()
 58 |             sum_stats = dict()
 59 |             riven_stats_data = read_json(developer_summary_stats_file_path)
 60 |             for riven_type in riven_stats_data.values():
 61 |                 for weapon_item_name, riven_stats in riven_type.items():
 62 |                     if weapon_item_name not in self.items_data_mapped_by_item_name:
 63 |                         continue
 64 |                     weapon_url_name = self.get_url_name(weapon_item_name)
 65 |                     sum_stats[weapon_url_name] = riven_stats
 66 |             self.developer_summary_stats = sum_stats
 67 | 
 68 |     def get_item_names(self) -> List[str]:
 69 |         self.load_items()
 70 |         return sorted(self.items_data_mapped_by_item_name.keys())
 71 | 
 72 |     def get_url_names(self) -> List[str]:
 73 |         self.load_items()
 74 |         return sorted(self.items_data_mapped_by_url_name.keys())
 75 | 
 76 |     def get_attribute_names(self) -> List[str]:
 77 |         self.load_attributes()
 78 |         return sorted(self.attributes_data.keys())
 79 | 
 80 |     def get_attribute_shortcuts(self) -> List[str]:
 81 |         self.load_attributes()
 82 |         return sorted(self.attribute_name_shortcuts.keys())
 83 | 
 84 |     def get_proper_attribute_name(self, attribute_name: str) -> str:
 85 |         self.load_attributes()
 86 |         return self.attribute_name_shortcuts[attribute_name]
 87 | 
 88 |     def get_url_name(self, weapon_name: str) -> str:
 89 |         self.load_items()
 90 |         if weapon_name in self.items_data_mapped_by_item_name:
 91 |             return self.name_inverse_mapping[weapon_name]
 92 |         elif weapon_name in self.items_data_mapped_by_url_name:
 93 |             return weapon_name
 94 |         else:
 95 |             print("Incorrect weapon name. Displaying possible names:")
 96 |             for x in self.items_data_mapped_by_item_name.keys():
 97 |                 if x[:2].lower() == weapon_name[:2].lower():
 98 |                     print(x)
 99 |             raise ValueError(f"{weapon_name} does not exist.")
100 | 
101 |     def get_item_name(self, weapon_name: str) -> str:
102 |         self.load_items()
103 |         if weapon_name in self.items_data_mapped_by_url_name:
104 |             return self.name_inverse_mapping[weapon_name]
105 |         elif weapon_name in self.items_data_mapped_by_item_name:
106 |             return weapon_name
107 |         else:
108 |             print("Incorrect weapon name. Displaying possible names:")
109 |             for x in self.items_data_mapped_by_item_name.keys():
110 |                 if x[:2].lower() == weapon_name[:2].lower():
111 |                     print(x)
112 |             raise ValueError(f"{weapon_name} does not exist.")
113 | 
114 |     def weapon_exists(self, weapon_name: str) -> bool:
115 |         self.load_items()
116 |         return self.get_url_name(weapon_name) in self.items_data_mapped_by_url_name
117 | 
118 |     def is_valid_attribute_shortcut(self, attribute_name: str) -> bool:
119 |         self.load_attributes()
120 |         return attribute_name in self.attribute_name_shortcuts
121 | 
122 |     def get_weapon_specific_attributes(self, weapon_name: str) -> List[str]:
123 |         self.load_items()
124 |         weapon_name = self.get_url_name(weapon_name)
125 |         weapon_group = self.items_data_mapped_by_url_name[weapon_name]["group"]
126 | 
127 |         melee_attributes = ["damage_vs_corpus", "damage_vs_grineer", "damage_vs_infested", "cold_damage",
128 |                             "channeling_damage", "channeling_efficiency", "combo_duration", "critical_chance",
129 |                             "critical_chance_on_slide_attack", "critical_damage", "base_damage_/_melee_damage",
130 |                             "electric_damage", "heat_damage", "finisher_damage", "fire_rate_/_attack_speed",
131 |                             "impact_damage", "toxin_damage", "puncture_damage", "range", "slash_damage",
132 |                             "status_chance", "status_duration", "chance_to_gain_extra_combo_count",
133 |                             "chance_to_gain_combo_count"]
134 |         gun_attributes = ["ammo_maximum", "damage_vs_corpus", "damage_vs_grineer", "damage_vs_infested", "cold_damage",
135 |                           "critical_chance", "critical_damage", "base_damage_/_melee_damage", "electric_damage",
136 |                           "heat_damage", "fire_rate_/_attack_speed", "projectile_speed", "impact_damage",
137 |                           "magazine_capacity", "multishot", "toxin_damage", "punch_through", "puncture_damage",
138 |                           "reload_speed", "slash_damage", "status_chance", "status_duration", "recoil", "zoom"]
139 | 
140 |         melee_groups = ["zaw", "melee"]
141 |         gun_groups = ["kitgun", "sentinel", "archgun", "primary", "secondary"]
142 | 
143 |         if weapon_group in melee_groups:
144 |             return melee_attributes.copy()
145 |         if weapon_group in gun_groups:
146 |             return gun_attributes.copy()
147 | 
148 |     def get_official_attribute_name(self, attribute_url_name: str) -> str:
149 |         self.load_attributes()
150 |         if attribute_url_name == "<NONE>":
151 |             return ""
152 |         return self.attributes_data[attribute_url_name]["effect"]
153 | 
154 |     def get_weapon_ranking_information(self, weapon_name: str) -> Dict[str, Any]:
155 |         self.load_weapon_ranking_information()
156 |         weapon_name = self.get_url_name(weapon_name)
157 |         rank_data = {"total_ranks": len(self.weapon_ranking_information)}
158 |         rank_data.update(self.weapon_ranking_information[weapon_name])
159 |         return rank_data
160 | 
161 |     def get_global_price_percentile(self, weapon_price: float) -> float:
162 |         self.load_global_price_freq()
163 |         total_freq = sum(self.global_price_freq.values())
164 |         cumulative_freq = sum(freq for price, freq in self.global_price_freq.items() if float(price) <= weapon_price)
165 |         percentile = (cumulative_freq / total_freq) * 100 if total_freq > 0 else 0.0
166 |         return percentile
167 | 
168 |     def determine_variants(self) -> None:
169 |         if self.variants:
170 |             return
171 | 
172 |         self.load_ig_weapon_stats()
173 |         names: List[str] = list(self.ig_weapon_stats)
174 | 
175 |         prefixes = {"Carmine", "Ceti", "Kuva", "Prisma", "Rakta", "Sancti", "Secura", "Synoid", "Telos", "Tenet",
176 |                     "Vaykor"}
177 |         suffixes = {"Prime", "Vandal", "Wraith"}
178 | 
179 |         variants = dict()
180 |         for name in names:
181 |             has_prefix = any(prefix in name for prefix in prefixes)
182 |             has_suffix = any(suffix in name for suffix in suffixes)
183 |             if not has_prefix and not has_suffix:
184 |                 variants[name] = []
185 | 
186 |         for name in names:
187 |             if name not in variants:
188 |                 base_name = " ".join(word for word in name.split() if word not in prefixes and word not in suffixes)
189 |                 if base_name not in variants:
190 |                     variants[name] = []
191 |                 else:
192 |                     variants[base_name].append(name)
193 | 
194 |         self.variants = variants
195 | 
196 |     def get_most_common_variant(self, weapon_name: str) -> Optional[str]:
197 |         self.load_items()
198 |         weapon_name = self.get_item_name(weapon_name)
199 | 
200 |         subjective_best_variants = {
201 |             "Braton": "Braton Vandal",
202 |             "Gorgon": "Prisma Gorgon",
203 |             "Karak": "Kuva Karak",
204 |             "Lato": "Lato",
205 |             "Latron": "Latron Prime",
206 |             "Machete": "Machete Wraith",
207 |             "Penta": "Secura Penta",
208 |             "Skana": "Prisma Skana",
209 |             "Strun": "Strun Prime",
210 |             "Tigris": "Tigris Prime",
211 |         }
212 |         if weapon_name in subjective_best_variants:
213 |             return subjective_best_variants[weapon_name]
214 | 
215 |         self.determine_variants()
216 | 
217 |         if weapon_name not in self.variants:
218 |             return None
219 | 
220 |         if not self.variants[weapon_name]:
221 |             return weapon_name
222 | 
223 |         for variant in self.variants[weapon_name]:
224 |             for x in ["Prime", "Kuva", "Tenet"]:
225 |                 if x in variant:
226 |                     return variant
227 | 
228 |         return self.variants[weapon_name][-1]
229 | 
230 |     def get_disposition(self, weapon_name: str) -> Optional[int]:
231 |         self.load_ig_weapon_stats()
232 |         weapon_name = self.get_url_name(weapon_name)
233 | 
234 |         best_variant = self.get_most_common_variant(weapon_name)
235 |         if best_variant and "disposition" in self.ig_weapon_stats[best_variant]:
236 |             return self.ig_weapon_stats[best_variant]["disposition"]
237 | 
238 |         dispositions = {
239 |             "Verglas": 4,
240 |             "Akaten": 3,
241 |             "Lacerten": 3,
242 |             "Helstrum": 3,
243 |             "Deconstructor": 4,
244 |             "AX-52": 1,
245 |             "Batoten": 3,
246 |             "Laser Rifle": 4,
247 |             "Vermisplicer": 3,
248 |             "Vulklok": 4,
249 |             "Tombfinger": 3,
250 |             "Amanata": 1,
251 |             "Sweeper": 3,
252 |             "Burst Laser": 5,
253 |             "Vulcax": 3,
254 |             "Deth Machine Rifle": 5,
255 |             "Sporelacer": 3,
256 |             "Higasa": 1,
257 |             "Stinger": 5,
258 |             "Dark Split-Sword": 4,
259 |             "Multron": 3,
260 |             "Artax": 3,
261 |             "Tazicor": 3,
262 |             "Gaze": 3,
263 |             "Cryotra": 3,
264 |             "Catchmoon": 3,
265 |             "Rattleguts": 3,
266 |         }
267 |         if (item_name := self.get_item_name(weapon_name)) in dispositions:
268 |             return dispositions[item_name]
269 | 
270 |         return None
271 | 
272 |     def weapon_has_incarnon(self, weapon_name: str) -> bool:
273 |         self.load_items()
274 |         weapon_name = self.get_url_name(weapon_name)
275 |         incarnons = {
276 |             # Week 1 (A)
277 |             "Braton", "Lato", "Skana", "Paris", "Kunai",
278 |             # Week 2 (B)
279 |             "Boar", "Gammacor", "Angstrum", "Gorgon", "Anku",
280 |             # Week 3 (C)
281 |             "Bo", "Latron", "Furis", "Furax", "Strun",
282 |             # Week 4 (D)
283 |             "Lex", "Magistar", "Boltor", "Bronco", "Ceramic Dagger",
284 |             # Week 5 (E)
285 |             "Torid", "Dual Toxocyst", "Dual Ichor", "Miter", "Atomos",
286 |             # Week 6 (F)
287 |             "Ack & Brunt", "Soma", "Vasto", "Nami Solo", "Burston",
288 |             # Week 7 (G)
289 |             "Zylok", "Sibear", "Dread", "Despair", "Hate",
290 |             # Week 8 (H)
291 |             "Dera", "Sybaris", "Cestra", "Sicarus", "Okina",
292 |             # Zariman
293 |             "Felarx", "Innodem", "Laetum", "Phenmor", "Praedos",
294 |             # Sanctum Anatomica
295 |             "Onos", "Ruvox",
296 |         }
297 | 
298 |         return self.get_item_name(weapon_name) in incarnons
299 | 
300 |     def get_summary_stats(self, name: str, rolled_status: str = "rerolled") -> Dict:
301 |         name = self.get_url_name(name)
302 |         return self.developer_summary_stats.get(name, {}).get(rolled_status)
303 | 
304 |     def get_weapon_group(self, name: str) -> str:
305 |         self.load_developer_summary_stats()
306 |         name = self.get_url_name(name)
307 |         de_weapon_group = summary_stats["itemType"].split()[0] if (summary_stats := self.get_summary_stats(name)) \
308 |             else ""
309 |         item_name = self.get_item_name(name)
310 |         marketplace_weapon_group = self.items_data_mapped_by_item_name[item_name]["group"]
311 |         if marketplace_weapon_group == "sentinel":
312 |             weapon_group = "Sentinel"
313 |         elif de_weapon_group == "Shotgun":
314 |             weapon_group = "Shotgun"
315 |         elif de_weapon_group == "":
316 |             weapon_group = {
317 |                 "primary": "Rifle",
318 |                 "secondary": "Pistol",
319 |                 "melee": "Melee",
320 | 
321 |             }.get(marketplace_weapon_group, marketplace_weapon_group.title())
322 |         else:
323 |             weapon_group = de_weapon_group
324 |         return weapon_group
325 | 
326 |     def get_groups(self):
327 |         return sorted(set(map(self.get_weapon_group, self.get_url_names())))
328 | 
329 |     def get_popularity(self, name: str, rolled_status: str = "rerolled") -> Optional[float]:
330 |         self.load_developer_summary_stats()
331 |         name = self.get_item_name(name)
332 | 
333 |         if (summary_stats := self.get_summary_stats(name, rolled_status)) is None:
334 |             return None
335 | 
336 |         return summary_stats.get("pop", 0)
337 | 
338 |     def get_average_trade_price(self, name: str, rolled_status: str = "rerolled") -> Optional[float]:
339 |         self.load_developer_summary_stats()
340 |         name = self.get_url_name(name)
341 | 
342 |         if (summary_stats := self.get_summary_stats(name, rolled_status)) is None:
343 |             return None
344 | 
345 |         return summary_stats.get("avg", 0)
346 | 


--------------------------------------------------------------------------------