├── BERT
    ├── safetensors-0.4.2.dist-info
    │   ├── INSTALLER
    │   ├── METADATA
    │   ├── RECORD
    │   └── WHEEL
    └── safetensors
    │   ├── __init__.py
    │   ├── __init__.pyi
    │   ├── __pycache__
    │       ├── __init__.cpython-311.pyc
    │       ├── flax.cpython-311.pyc
    │       ├── mlx.cpython-311.pyc
    │       ├── numpy.cpython-311.pyc
    │       ├── paddle.cpython-311.pyc
    │       ├── tensorflow.cpython-311.pyc
    │       └── torch.cpython-311.pyc
    │   ├── _safetensors_rust.cp311-win_amd64.pyd
    │   ├── flax.py
    │   ├── mlx.py
    │   ├── numpy.py
    │   ├── paddle.py
    │   ├── py.typed
    │   ├── tensorflow.py
    │   └── torch.py
├── Newbook.py
├── Novel generator.mp4
├── bookgenerator.py
├── sample_text.txt
└── start.py


/BERT/safetensors-0.4.2.dist-info/INSTALLER:
--------------------------------------------------------------------------------
1 | pip
2 | 


--------------------------------------------------------------------------------
/BERT/safetensors-0.4.2.dist-info/METADATA:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.1
  2 | Name: safetensors
  3 | Version: 0.4.2
  4 | Classifier: Development Status :: 5 - Production/Stable
  5 | Classifier: Intended Audience :: Developers
  6 | Classifier: Intended Audience :: Education
  7 | Classifier: Intended Audience :: Science/Research
  8 | Classifier: License :: OSI Approved :: Apache Software License
  9 | Classifier: Operating System :: OS Independent
 10 | Classifier: Programming Language :: Python :: 3
 11 | Classifier: Programming Language :: Python :: 3.7
 12 | Classifier: Programming Language :: Python :: 3.8
 13 | Classifier: Programming Language :: Python :: 3.9
 14 | Classifier: Programming Language :: Python :: 3.10
 15 | Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 16 | Classifier: Typing :: Typed
 17 | Requires-Dist: numpy >=1.21.6 ; extra == 'numpy'
 18 | Requires-Dist: safetensors[numpy] ; extra == 'torch'
 19 | Requires-Dist: torch >=1.10 ; extra == 'torch'
 20 | Requires-Dist: safetensors[numpy] ; extra == 'tensorflow'
 21 | Requires-Dist: tensorflow >=2.11.0 ; extra == 'tensorflow'
 22 | Requires-Dist: safetensors[numpy] ; extra == 'pinned-tf'
 23 | Requires-Dist: tensorflow ==2.11.0 ; extra == 'pinned-tf'
 24 | Requires-Dist: safetensors[numpy] ; extra == 'jax'
 25 | Requires-Dist: flax >=0.6.3 ; extra == 'jax'
 26 | Requires-Dist: jax >=0.3.25 ; extra == 'jax'
 27 | Requires-Dist: jaxlib >=0.3.25 ; extra == 'jax'
 28 | Requires-Dist: mlx >=0.0.9 ; extra == 'mlx'
 29 | Requires-Dist: safetensors[numpy] ; extra == 'paddlepaddle'
 30 | Requires-Dist: paddlepaddle >=2.4.1 ; extra == 'paddlepaddle'
 31 | Requires-Dist: black ==22.3 ; extra == 'quality'
 32 | Requires-Dist: click ==8.0.4 ; extra == 'quality'
 33 | Requires-Dist: isort >=5.5.4 ; extra == 'quality'
 34 | Requires-Dist: flake8 >=3.8.3 ; extra == 'quality'
 35 | Requires-Dist: safetensors[numpy] ; extra == 'testing'
 36 | Requires-Dist: h5py >=3.7.0 ; extra == 'testing'
 37 | Requires-Dist: huggingface_hub >=0.12.1 ; extra == 'testing'
 38 | Requires-Dist: setuptools_rust >=1.5.2 ; extra == 'testing'
 39 | Requires-Dist: pytest >=7.2.0 ; extra == 'testing'
 40 | Requires-Dist: pytest-benchmark >=4.0.0 ; extra == 'testing'
 41 | Requires-Dist: hypothesis >=6.70.2 ; extra == 'testing'
 42 | Requires-Dist: safetensors[torch] ; extra == 'all'
 43 | Requires-Dist: safetensors[numpy] ; extra == 'all'
 44 | Requires-Dist: safetensors[pinned-tf] ; extra == 'all'
 45 | Requires-Dist: safetensors[jax] ; extra == 'all'
 46 | Requires-Dist: safetensors[paddlepaddle] ; extra == 'all'
 47 | Requires-Dist: safetensors[quality] ; extra == 'all'
 48 | Requires-Dist: safetensors[testing] ; extra == 'all'
 49 | Requires-Dist: safetensors[all] ; extra == 'dev'
 50 | Provides-Extra: numpy
 51 | Provides-Extra: torch
 52 | Provides-Extra: tensorflow
 53 | Provides-Extra: pinned-tf
 54 | Provides-Extra: jax
 55 | Provides-Extra: mlx
 56 | Provides-Extra: paddlepaddle
 57 | Provides-Extra: quality
 58 | Provides-Extra: testing
 59 | Provides-Extra: all
 60 | Provides-Extra: dev
 61 | Author-email: Nicolas Patry <patry.nicolas@protonmail.com>
 62 | Requires-Python: >=3.7
 63 | Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
 64 | Project-URL: Homepage, https://github.com/huggingface/safetensors
 65 | Project-URL: Source, https://github.com/huggingface/safetensors
 66 | 
 67 | ## Installation
 68 | 
 69 | ```
 70 | pip install safetensors
 71 | ```
 72 | 
 73 | 
 74 | ## Usage
 75 | 
 76 | ### Numpy
 77 | 
 78 | ```python
 79 | from safetensors.numpy import save_file, load_file
 80 | import numpy as np
 81 | 
 82 | tensors = {
 83 |    "a": np.zeros((2, 2)),
 84 |    "b": np.zeros((2, 3), dtype=np.uint8)
 85 | }
 86 | 
 87 | save_file(tensors, "./model.safetensors")
 88 | 
 89 | 
 90 | # Now loading
 91 | loaded = load_file("./model.safetensors")
 92 | ```
 93 | 
 94 | ### Torch
 95 | 
 96 | ```python
 97 | from safetensors.torch import save_file, load_file
 98 | import torch
 99 | 
100 | tensors = {
101 |    "a": torch.zeros((2, 2)),
102 |    "b": torch.zeros((2, 3), dtype=torch.uint8)
103 | }
104 | 
105 | save_file(tensors, "./model.safetensors")
106 | 
107 | 
108 | # Now loading
109 | loaded = load_file("./model.safetensors")
110 | ```
111 | 
112 | ### Developing
113 | 
114 | ```
115 | # inside ./safetensors/bindings/python
116 | pip install .[dev]
117 | ```
118 | Should be enough to install this library locally.
119 | 
120 | ### Testing
121 | 
122 | ```
123 | # inside ./safetensors/bindings/python
124 | pip install .[dev]
125 | pytest -sv tests/
126 | ```
127 | 
128 | 


--------------------------------------------------------------------------------
/BERT/safetensors-0.4.2.dist-info/RECORD:
--------------------------------------------------------------------------------
 1 | safetensors-0.4.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 2 | safetensors-0.4.2.dist-info/METADATA,sha256=6zMGpEIufl1E1e5fk1bRwfKz-CzL6_W0NhvCe6KFVNM,3902
 3 | safetensors-0.4.2.dist-info/RECORD,,
 4 | safetensors-0.4.2.dist-info/WHEEL,sha256=QWeP2iNVclb1lqJMYxkB3S_jGRNF2KI-alH1eOqR1Tk,95
 5 | safetensors/__init__.py,sha256=xxEn5gc4JWHjA0wnafb0Wwwq1m2QXwlbm5nsItGRqO0,180
 6 | safetensors/__init__.pyi,sha256=Q6YltUfa-NjJVWCyolhrT-Yd03xQ4mn52eKKUXdAs4k,2043
 7 | safetensors/__pycache__/__init__.cpython-311.pyc,,
 8 | safetensors/__pycache__/flax.cpython-311.pyc,,
 9 | safetensors/__pycache__/mlx.cpython-311.pyc,,
10 | safetensors/__pycache__/numpy.cpython-311.pyc,,
11 | safetensors/__pycache__/paddle.cpython-311.pyc,,
12 | safetensors/__pycache__/tensorflow.cpython-311.pyc,,
13 | safetensors/__pycache__/torch.cpython-311.pyc,,
14 | safetensors/_safetensors_rust.cp311-win_amd64.pyd,sha256=mXOYeQYjcgJM-rHeEfcSncIwmPEfucLK2ZstxVOdIow,594944
15 | safetensors/flax.py,sha256=lyCfXWTwOqkKvfIbsfy1lTNoslqZm5RjBvvg6d1ZNZU,3984
16 | safetensors/mlx.py,sha256=MfhCBl9M9uW5BnFngRg3CNobi7fZXxS3ulJ9so82cNE,3975
17 | safetensors/numpy.py,sha256=HBAwy9bXQIKM_uk328kXiZ8tAIhtiq4usngqAWu_l8Y,5113
18 | safetensors/paddle.py,sha256=0aZHDE0sR-Dp3Nrwy8Y28hax9yKWbiyot1rfEHfNbvg,4301
19 | safetensors/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20 | safetensors/tensorflow.py,sha256=hvOLFjirEm-mZlzKcwbeNZutgcKtUxnaEP2VlrGd-Zc,4027
21 | safetensors/torch.py,sha256=Ycl2n5LlDMkio6FWQypleGs7lf78-6SL_CpR3WLxSC4,17786
22 | 


--------------------------------------------------------------------------------
/BERT/safetensors-0.4.2.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: maturin (1.4.0)
3 | Root-Is-Purelib: false
4 | Tag: cp311-none-win_amd64
5 | 


--------------------------------------------------------------------------------
/BERT/safetensors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Re-export this
 2 | from ._safetensors_rust import (  # noqa: F401
 3 |     SafetensorError,
 4 |     __version__,
 5 |     deserialize,
 6 |     safe_open,
 7 |     serialize,
 8 |     serialize_file,
 9 | )
10 | 


--------------------------------------------------------------------------------
/BERT/safetensors/__init__.pyi:
--------------------------------------------------------------------------------
 1 | # Generated content DO NOT EDIT
 2 | @staticmethod
 3 | def deserialize(bytes):
 4 |     """
 5 |     Opens a safetensors lazily and returns tensors as asked
 6 | 
 7 |     Args:
 8 |         data (:obj:`bytes`):
 9 |             The byte content of a file
10 | 
11 |     Returns:
12 |         (:obj:`List[str, Dict[str, Dict[str, any]]]`):
13 |             The deserialized content is like:
14 |                 [("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
15 |     """
16 |     pass
17 | 
18 | @staticmethod
19 | def serialize(tensor_dict, metadata=None):
20 |     """
21 |     Serializes raw data.
22 | 
23 |     Args:
24 |         tensor_dict (:obj:`Dict[str, Dict[Any]]`):
25 |             The tensor dict is like:
26 |                 {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
27 |         metadata (:obj:`Dict[str, str]`, *optional*):
28 |             The optional purely text annotations
29 | 
30 |     Returns:
31 |         (:obj:`bytes`):
32 |             The serialized content.
33 |     """
34 |     pass
35 | 
36 | @staticmethod
37 | def serialize_file(tensor_dict, filename, metadata=None):
38 |     """
39 |     Serializes raw data.
40 | 
41 |     Args:
42 |         tensor_dict (:obj:`Dict[str, Dict[Any]]`):
43 |             The tensor dict is like:
44 |                 {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
45 |         filename (:obj:`str`):
46 |             The name of the file to write into.
47 |         metadata (:obj:`Dict[str, str]`, *optional*):
48 |             The optional purely text annotations
49 | 
50 |     Returns:
51 |         (:obj:`bytes`):
52 |             The serialized content.
53 |     """
54 |     pass
55 | 
56 | class safe_open:
57 |     """
58 |     Opens a safetensors lazily and returns tensors as asked
59 | 
60 |     Args:
61 |         filename (:obj:`str`):
62 |             The filename to open
63 | 
64 |         framework (:obj:`str`):
65 |             The framework you want you tensors in. Supported values:
66 |             `pt`, `tf`, `flax`, `numpy`.
67 | 
68 |         device (:obj:`str`, defaults to :obj:`"cpu"`):
69 |             The device on which you want the tensors.
70 |     """
71 | 
72 |     def __init__(self, filename, framework, device="cpu"):
73 |         pass
74 | 


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/flax.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/flax.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/mlx.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/mlx.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/numpy.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/numpy.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/paddle.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/paddle.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/tensorflow.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/tensorflow.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/__pycache__/torch.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/torch.cpython-311.pyc


--------------------------------------------------------------------------------
/BERT/safetensors/_safetensors_rust.cp311-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/_safetensors_rust.cp311-win_amd64.pyd


--------------------------------------------------------------------------------
/BERT/safetensors/flax.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Optional, Union
  3 | 
  4 | import numpy as np
  5 | 
  6 | import jax.numpy as jnp
  7 | from jax import Array
  8 | from safetensors import numpy, safe_open
  9 | 
 10 | 
 11 | def save(tensors: Dict[str, Array], metadata: Optional[Dict[str, str]] = None) -> bytes:
 12 |     """
 13 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 14 | 
 15 |     Args:
 16 |         tensors (`Dict[str, Array]`):
 17 |             The incoming tensors. Tensors need to be contiguous and dense.
 18 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 19 |             Optional text only metadata you might want to save in your header.
 20 |             For instance it can be useful to specify more about the underlying
 21 |             tensors. This is purely informative and does not affect tensor loading.
 22 | 
 23 |     Returns:
 24 |         `bytes`: The raw bytes representing the format
 25 | 
 26 |     Example:
 27 | 
 28 |     ```python
 29 |     from safetensors.flax import save
 30 |     from jax import numpy as jnp
 31 | 
 32 |     tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
 33 |     byte_data = save(tensors)
 34 |     ```
 35 |     """
 36 |     np_tensors = _jnp2np(tensors)
 37 |     return numpy.save(np_tensors, metadata=metadata)
 38 | 
 39 | 
 40 | def save_file(
 41 |     tensors: Dict[str, Array],
 42 |     filename: Union[str, os.PathLike],
 43 |     metadata: Optional[Dict[str, str]] = None,
 44 | ) -> None:
 45 |     """
 46 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 47 | 
 48 |     Args:
 49 |         tensors (`Dict[str, Array]`):
 50 |             The incoming tensors. Tensors need to be contiguous and dense.
 51 |         filename (`str`, or `os.PathLike`)):
 52 |             The filename we're saving into.
 53 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 54 |             Optional text only metadata you might want to save in your header.
 55 |             For instance it can be useful to specify more about the underlying
 56 |             tensors. This is purely informative and does not affect tensor loading.
 57 | 
 58 |     Returns:
 59 |         `None`
 60 | 
 61 |     Example:
 62 | 
 63 |     ```python
 64 |     from safetensors.flax import save_file
 65 |     from jax import numpy as jnp
 66 | 
 67 |     tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
 68 |     save_file(tensors, "model.safetensors")
 69 |     ```
 70 |     """
 71 |     np_tensors = _jnp2np(tensors)
 72 |     return numpy.save_file(np_tensors, filename, metadata=metadata)
 73 | 
 74 | 
 75 | def load(data: bytes) -> Dict[str, Array]:
 76 |     """
 77 |     Loads a safetensors file into flax format from pure bytes.
 78 | 
 79 |     Args:
 80 |         data (`bytes`):
 81 |             The content of a safetensors file
 82 | 
 83 |     Returns:
 84 |         `Dict[str, Array]`: dictionary that contains name as key, value as `Array` on cpu
 85 | 
 86 |     Example:
 87 | 
 88 |     ```python
 89 |     from safetensors.flax import load
 90 | 
 91 |     file_path = "./my_folder/bert.safetensors"
 92 |     with open(file_path, "rb") as f:
 93 |         data = f.read()
 94 | 
 95 |     loaded = load(data)
 96 |     ```
 97 |     """
 98 |     flat = numpy.load(data)
 99 |     return _np2jnp(flat)
100 | 
101 | 
102 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, Array]:
103 |     """
104 |     Loads a safetensors file into flax format.
105 | 
106 |     Args:
107 |         filename (`str`, or `os.PathLike`)):
108 |             The name of the file which contains the tensors
109 | 
110 |     Returns:
111 |         `Dict[str, Array]`: dictionary that contains name as key, value as `Array`
112 | 
113 |     Example:
114 | 
115 |     ```python
116 |     from safetensors.flax import load_file
117 | 
118 |     file_path = "./my_folder/bert.safetensors"
119 |     loaded = load_file(file_path)
120 |     ```
121 |     """
122 |     result = {}
123 |     with safe_open(filename, framework="flax") as f:
124 |         for k in f.keys():
125 |             result[k] = f.get_tensor(k)
126 |     return result
127 | 
128 | 
129 | def _np2jnp(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, Array]:
130 |     for k, v in numpy_dict.items():
131 |         numpy_dict[k] = jnp.array(v)
132 |     return numpy_dict
133 | 
134 | 
135 | def _jnp2np(jnp_dict: Dict[str, Array]) -> Dict[str, np.array]:
136 |     for k, v in jnp_dict.items():
137 |         jnp_dict[k] = np.asarray(v)
138 |     return jnp_dict
139 | 


--------------------------------------------------------------------------------
/BERT/safetensors/mlx.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Optional, Union
  3 | 
  4 | import numpy as np
  5 | 
  6 | import mlx.core as mx
  7 | from safetensors import numpy, safe_open
  8 | 
  9 | 
 10 | def save(tensors: Dict[str, mx.array], metadata: Optional[Dict[str, str]] = None) -> bytes:
 11 |     """
 12 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 13 | 
 14 |     Args:
 15 |         tensors (`Dict[str, mx.array]`):
 16 |             The incoming tensors. Tensors need to be contiguous and dense.
 17 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 18 |             Optional text only metadata you might want to save in your header.
 19 |             For instance it can be useful to specify more about the underlying
 20 |             tensors. This is purely informative and does not affect tensor loading.
 21 | 
 22 |     Returns:
 23 |         `bytes`: The raw bytes representing the format
 24 | 
 25 |     Example:
 26 | 
 27 |     ```python
 28 |     from safetensors.mlx import save
 29 |     import mlx.core as mx
 30 | 
 31 |     tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
 32 |     byte_data = save(tensors)
 33 |     ```
 34 |     """
 35 |     np_tensors = _mx2np(tensors)
 36 |     return numpy.save(np_tensors, metadata=metadata)
 37 | 
 38 | 
 39 | def save_file(
 40 |     tensors: Dict[str, mx.array],
 41 |     filename: Union[str, os.PathLike],
 42 |     metadata: Optional[Dict[str, str]] = None,
 43 | ) -> None:
 44 |     """
 45 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 46 | 
 47 |     Args:
 48 |         tensors (`Dict[str, mx.array]`):
 49 |             The incoming tensors. Tensors need to be contiguous and dense.
 50 |         filename (`str`, or `os.PathLike`)):
 51 |             The filename we're saving into.
 52 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 53 |             Optional text only metadata you might want to save in your header.
 54 |             For instance it can be useful to specify more about the underlying
 55 |             tensors. This is purely informative and does not affect tensor loading.
 56 | 
 57 |     Returns:
 58 |         `None`
 59 | 
 60 |     Example:
 61 | 
 62 |     ```python
 63 |     from safetensors.mlx import save_file
 64 |     import mlx.core as mx
 65 | 
 66 |     tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
 67 |     save_file(tensors, "model.safetensors")
 68 |     ```
 69 |     """
 70 |     np_tensors = _mx2np(tensors)
 71 |     return numpy.save_file(np_tensors, filename, metadata=metadata)
 72 | 
 73 | 
 74 | def load(data: bytes) -> Dict[str, mx.array]:
 75 |     """
 76 |     Loads a safetensors file into MLX format from pure bytes.
 77 | 
 78 |     Args:
 79 |         data (`bytes`):
 80 |             The content of a safetensors file
 81 | 
 82 |     Returns:
 83 |         `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
 84 | 
 85 |     Example:
 86 | 
 87 |     ```python
 88 |     from safetensors.mlx import load
 89 | 
 90 |     file_path = "./my_folder/bert.safetensors"
 91 |     with open(file_path, "rb") as f:
 92 |         data = f.read()
 93 | 
 94 |     loaded = load(data)
 95 |     ```
 96 |     """
 97 |     flat = numpy.load(data)
 98 |     return _np2mx(flat)
 99 | 
100 | 
101 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, mx.array]:
102 |     """
103 |     Loads a safetensors file into MLX format.
104 | 
105 |     Args:
106 |         filename (`str`, or `os.PathLike`)):
107 |             The name of the file which contains the tensors
108 | 
109 |     Returns:
110 |         `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
111 | 
112 |     Example:
113 | 
114 |     ```python
115 |     from safetensors.flax import load_file
116 | 
117 |     file_path = "./my_folder/bert.safetensors"
118 |     loaded = load_file(file_path)
119 |     ```
120 |     """
121 |     result = {}
122 |     with safe_open(filename, framework="mlx") as f:
123 |         for k in f.keys():
124 |             result[k] = f.get_tensor(k)
125 |     return result
126 | 
127 | 
128 | def _np2mx(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, mx.array]:
129 |     for k, v in numpy_dict.items():
130 |         numpy_dict[k] = mx.array(v)
131 |     return numpy_dict
132 | 
133 | 
134 | def _mx2np(mx_dict: Dict[str, mx.array]) -> Dict[str, np.array]:
135 |     new_dict = {}
136 |     for k, v in mx_dict.items():
137 |         new_dict[k] = np.asarray(v)
138 |     return new_dict
139 | 


--------------------------------------------------------------------------------
/BERT/safetensors/numpy.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from typing import Dict, Optional, Union
  4 | 
  5 | import numpy as np
  6 | 
  7 | from safetensors import deserialize, safe_open, serialize, serialize_file
  8 | 
  9 | 
 10 | def _tobytes(tensor: np.ndarray) -> bytes:
 11 |     if not _is_little_endian(tensor):
 12 |         tensor = tensor.byteswap(inplace=False)
 13 |     return tensor.tobytes()
 14 | 
 15 | 
 16 | def save(tensor_dict: Dict[str, np.ndarray], metadata: Optional[Dict[str, str]] = None) -> bytes:
 17 |     """
 18 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 19 | 
 20 |     Args:
 21 |         tensor_dict (`Dict[str, np.ndarray]`):
 22 |             The incoming tensors. Tensors need to be contiguous and dense.
 23 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 24 |             Optional text only metadata you might want to save in your header.
 25 |             For instance it can be useful to specify more about the underlying
 26 |             tensors. This is purely informative and does not affect tensor loading.
 27 | 
 28 |     Returns:
 29 |         `bytes`: The raw bytes representing the format
 30 | 
 31 |     Example:
 32 | 
 33 |     ```python
 34 |     from safetensors.numpy import save
 35 |     import numpy as np
 36 | 
 37 |     tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
 38 |     byte_data = save(tensors)
 39 |     ```
 40 |     """
 41 |     flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()}
 42 |     serialized = serialize(flattened, metadata=metadata)
 43 |     result = bytes(serialized)
 44 |     return result
 45 | 
 46 | 
 47 | def save_file(
 48 |     tensor_dict: Dict[str, np.ndarray], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None
 49 | ) -> None:
 50 |     """
 51 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 52 | 
 53 |     Args:
 54 |         tensor_dict (`Dict[str, np.ndarray]`):
 55 |             The incoming tensors. Tensors need to be contiguous and dense.
 56 |         filename (`str`, or `os.PathLike`)):
 57 |             The filename we're saving into.
 58 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 59 |             Optional text only metadata you might want to save in your header.
 60 |             For instance it can be useful to specify more about the underlying
 61 |             tensors. This is purely informative and does not affect tensor loading.
 62 | 
 63 |     Returns:
 64 |         `None`
 65 | 
 66 |     Example:
 67 | 
 68 |     ```python
 69 |     from safetensors.numpy import save_file
 70 |     import numpy as np
 71 | 
 72 |     tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
 73 |     save_file(tensors, "model.safetensors")
 74 |     ```
 75 |     """
 76 |     flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()}
 77 |     serialize_file(flattened, filename, metadata=metadata)
 78 | 
 79 | 
 80 | def load(data: bytes) -> Dict[str, np.ndarray]:
 81 |     """
 82 |     Loads a safetensors file into numpy format from pure bytes.
 83 | 
 84 |     Args:
 85 |         data (`bytes`):
 86 |             The content of a safetensors file
 87 | 
 88 |     Returns:
 89 |         `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` on cpu
 90 | 
 91 |     Example:
 92 | 
 93 |     ```python
 94 |     from safetensors.numpy import load
 95 | 
 96 |     file_path = "./my_folder/bert.safetensors"
 97 |     with open(file_path, "rb") as f:
 98 |         data = f.read()
 99 | 
100 |     loaded = load(data)
101 |     ```
102 |     """
103 |     flat = deserialize(data)
104 |     return _view2np(flat)
105 | 
106 | 
107 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
108 |     """
109 |     Loads a safetensors file into numpy format.
110 | 
111 |     Args:
112 |         filename (`str`, or `os.PathLike`)):
113 |             The name of the file which contains the tensors
114 | 
115 |     Returns:
116 |         `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray`
117 | 
118 |     Example:
119 | 
120 |     ```python
121 |     from safetensors.numpy import load_file
122 | 
123 |     file_path = "./my_folder/bert.safetensors"
124 |     loaded = load_file(file_path)
125 |     ```
126 |     """
127 |     result = {}
128 |     with safe_open(filename, framework="np") as f:
129 |         for k in f.keys():
130 |             result[k] = f.get_tensor(k)
131 |     return result
132 | 
133 | 
134 | _TYPES = {
135 |     "F64": np.float64,
136 |     "F32": np.float32,
137 |     "F16": np.float16,
138 |     "I64": np.int64,
139 |     "U64": np.uint64,
140 |     "I32": np.int32,
141 |     "U32": np.uint32,
142 |     "I16": np.int16,
143 |     "U16": np.uint16,
144 |     "I8": np.int8,
145 |     "U8": np.uint8,
146 |     "BOOL": bool,
147 | }
148 | 
149 | 
150 | def _getdtype(dtype_str: str) -> np.dtype:
151 |     return _TYPES[dtype_str]
152 | 
153 | 
154 | def _view2np(safeview) -> Dict[str, np.ndarray]:
155 |     result = {}
156 |     for k, v in safeview:
157 |         dtype = _getdtype(v["dtype"])
158 |         arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
159 |         result[k] = arr
160 |     return result
161 | 
162 | 
163 | def _is_little_endian(tensor: np.ndarray) -> bool:
164 |     byteorder = tensor.dtype.byteorder
165 |     if byteorder == "=":
166 |         if sys.byteorder == "little":
167 |             return True
168 |         else:
169 |             return False
170 |     elif byteorder == "|":
171 |         return True
172 |     elif byteorder == "<":
173 |         return True
174 |     elif byteorder == ">":
175 |         return False
176 |     raise ValueError(f"Unexpected byte order {byteorder}")
177 | 


--------------------------------------------------------------------------------
/BERT/safetensors/paddle.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Optional, Union
  3 | 
  4 | import numpy as np
  5 | 
  6 | import paddle
  7 | from safetensors import numpy
  8 | 
  9 | 
 10 | def save(tensors: Dict[str, paddle.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes:
 11 |     """
 12 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 13 | 
 14 |     Args:
 15 |         tensors (`Dict[str, paddle.Tensor]`):
 16 |             The incoming tensors. Tensors need to be contiguous and dense.
 17 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 18 |             Optional text only metadata you might want to save in your header.
 19 |             For instance it can be useful to specify more about the underlying
 20 |             tensors. This is purely informative and does not affect tensor loading.
 21 | 
 22 |     Returns:
 23 |         `bytes`: The raw bytes representing the format
 24 | 
 25 |     Example:
 26 | 
 27 |     ```python
 28 |     from safetensors.paddle import save
 29 |     import paddle
 30 | 
 31 |     tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))}
 32 |     byte_data = save(tensors)
 33 |     ```
 34 |     """
 35 |     np_tensors = _paddle2np(tensors)
 36 |     return numpy.save(np_tensors, metadata=metadata)
 37 | 
 38 | 
 39 | def save_file(
 40 |     tensors: Dict[str, paddle.Tensor],
 41 |     filename: Union[str, os.PathLike],
 42 |     metadata: Optional[Dict[str, str]] = None,
 43 | ) -> None:
 44 |     """
 45 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 46 | 
 47 |     Args:
 48 |         tensors (`Dict[str, paddle.Tensor]`):
 49 |             The incoming tensors. Tensors need to be contiguous and dense.
 50 |         filename (`str`, or `os.PathLike`)):
 51 |             The filename we're saving into.
 52 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 53 |             Optional text only metadata you might want to save in your header.
 54 |             For instance it can be useful to specify more about the underlying
 55 |             tensors. This is purely informative and does not affect tensor loading.
 56 | 
 57 |     Returns:
 58 |         `None`
 59 | 
 60 |     Example:
 61 | 
 62 |     ```python
 63 |     from safetensors.paddle import save_file
 64 |     import paddle
 65 | 
 66 |     tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))}
 67 |     save_file(tensors, "model.safetensors")
 68 |     ```
 69 |     """
 70 |     np_tensors = _paddle2np(tensors)
 71 |     return numpy.save_file(np_tensors, filename, metadata=metadata)
 72 | 
 73 | 
 74 | def load(data: bytes, device: str = "cpu") -> Dict[str, paddle.Tensor]:
 75 |     """
 76 |     Loads a safetensors file into paddle format from pure bytes.
 77 | 
 78 |     Args:
 79 |         data (`bytes`):
 80 |             The content of a safetensors file
 81 | 
 82 |     Returns:
 83 |         `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` on cpu
 84 | 
 85 |     Example:
 86 | 
 87 |     ```python
 88 |     from safetensors.paddle import load
 89 | 
 90 |     file_path = "./my_folder/bert.safetensors"
 91 |     with open(file_path, "rb") as f:
 92 |         data = f.read()
 93 | 
 94 |     loaded = load(data)
 95 |     ```
 96 |     """
 97 |     flat = numpy.load(data)
 98 |     return _np2paddle(flat, device)
 99 | 
100 | 
101 | def load_file(filename: Union[str, os.PathLike], device="cpu") -> Dict[str, paddle.Tensor]:
102 |     """
103 |     Loads a safetensors file into paddle format.
104 | 
105 |     Args:
106 |         filename (`str`, or `os.PathLike`)):
107 |             The name of the file which contains the tensors
108 |         device (`Dict[str, any]`, *optional*, defaults to `cpu`):
109 |             The device where the tensors need to be located after load.
110 |             available options are all regular paddle device locations
111 | 
112 |     Returns:
113 |         `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor`
114 | 
115 |     Example:
116 | 
117 |     ```python
118 |     from safetensors.paddle import load_file
119 | 
120 |     file_path = "./my_folder/bert.safetensors"
121 |     loaded = load_file(file_path)
122 |     ```
123 |     """
124 |     flat = numpy.load_file(filename)
125 |     output = _np2paddle(flat, device)
126 |     return output
127 | 
128 | 
129 | def _np2paddle(numpy_dict: Dict[str, np.ndarray], device: str = "cpu") -> Dict[str, paddle.Tensor]:
130 |     for k, v in numpy_dict.items():
131 |         numpy_dict[k] = paddle.to_tensor(v, place=device)
132 |     return numpy_dict
133 | 
134 | 
135 | def _paddle2np(paddle_dict: Dict[str, paddle.Tensor]) -> Dict[str, np.array]:
136 |     for k, v in paddle_dict.items():
137 |         paddle_dict[k] = v.detach().cpu().numpy()
138 |     return paddle_dict
139 | 


--------------------------------------------------------------------------------
/BERT/safetensors/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/py.typed


--------------------------------------------------------------------------------
/BERT/safetensors/tensorflow.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Optional, Union
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | from safetensors import numpy, safe_open
  8 | 
  9 | 
 10 | def save(tensors: Dict[str, tf.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes:
 11 |     """
 12 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 13 | 
 14 |     Args:
 15 |         tensors (`Dict[str, tf.Tensor]`):
 16 |             The incoming tensors. Tensors need to be contiguous and dense.
 17 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 18 |             Optional text only metadata you might want to save in your header.
 19 |             For instance it can be useful to specify more about the underlying
 20 |             tensors. This is purely informative and does not affect tensor loading.
 21 | 
 22 |     Returns:
 23 |         `bytes`: The raw bytes representing the format
 24 | 
 25 |     Example:
 26 | 
 27 |     ```python
 28 |     from safetensors.tensorflow import save
 29 |     import tensorflow as tf
 30 | 
 31 |     tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
 32 |     byte_data = save(tensors)
 33 |     ```
 34 |     """
 35 |     np_tensors = _tf2np(tensors)
 36 |     return numpy.save(np_tensors, metadata=metadata)
 37 | 
 38 | 
 39 | def save_file(
 40 |     tensors: Dict[str, tf.Tensor],
 41 |     filename: Union[str, os.PathLike],
 42 |     metadata: Optional[Dict[str, str]] = None,
 43 | ) -> None:
 44 |     """
 45 |     Saves a dictionary of tensors into raw bytes in safetensors format.
 46 | 
 47 |     Args:
 48 |         tensors (`Dict[str, tf.Tensor]`):
 49 |             The incoming tensors. Tensors need to be contiguous and dense.
 50 |         filename (`str`, or `os.PathLike`)):
 51 |             The filename we're saving into.
 52 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
 53 |             Optional text only metadata you might want to save in your header.
 54 |             For instance it can be useful to specify more about the underlying
 55 |             tensors. This is purely informative and does not affect tensor loading.
 56 | 
 57 |     Returns:
 58 |         `None`
 59 | 
 60 |     Example:
 61 | 
 62 |     ```python
 63 |     from safetensors.tensorflow import save_file
 64 |     import tensorflow as tf
 65 | 
 66 |     tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
 67 |     save_file(tensors, "model.safetensors")
 68 |     ```
 69 |     """
 70 |     np_tensors = _tf2np(tensors)
 71 |     return numpy.save_file(np_tensors, filename, metadata=metadata)
 72 | 
 73 | 
 74 | def load(data: bytes) -> Dict[str, tf.Tensor]:
 75 |     """
 76 |     Loads a safetensors file into tensorflow format from pure bytes.
 77 | 
 78 |     Args:
 79 |         data (`bytes`):
 80 |             The content of a safetensors file
 81 | 
 82 |     Returns:
 83 |         `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` on cpu
 84 | 
 85 |     Example:
 86 | 
 87 |     ```python
 88 |     from safetensors.tensorflow import load
 89 | 
 90 |     file_path = "./my_folder/bert.safetensors"
 91 |     with open(file_path, "rb") as f:
 92 |         data = f.read()
 93 | 
 94 |     loaded = load(data)
 95 |     ```
 96 |     """
 97 |     flat = numpy.load(data)
 98 |     return _np2tf(flat)
 99 | 
100 | 
101 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, tf.Tensor]:
102 |     """
103 |     Loads a safetensors file into tensorflow format.
104 | 
105 |     Args:
106 |         filename (`str`, or `os.PathLike`)):
107 |             The name of the file which contains the tensors
108 | 
109 |     Returns:
110 |         `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor`
111 | 
112 |     Example:
113 | 
114 |     ```python
115 |     from safetensors.tensorflow import load_file
116 | 
117 |     file_path = "./my_folder/bert.safetensors"
118 |     loaded = load_file(file_path)
119 |     ```
120 |     """
121 |     result = {}
122 |     with safe_open(filename, framework="tf") as f:
123 |         for k in f.keys():
124 |             result[k] = f.get_tensor(k)
125 |     return result
126 | 
127 | 
128 | def _np2tf(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, tf.Tensor]:
129 |     for k, v in numpy_dict.items():
130 |         numpy_dict[k] = tf.convert_to_tensor(v)
131 |     return numpy_dict
132 | 
133 | 
134 | def _tf2np(tf_dict: Dict[str, tf.Tensor]) -> Dict[str, np.array]:
135 |     for k, v in tf_dict.items():
136 |         tf_dict[k] = v.numpy()
137 |     return tf_dict
138 | 


--------------------------------------------------------------------------------
/BERT/safetensors/torch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from collections import defaultdict
  4 | from typing import Any, Dict, List, Optional, Set, Tuple, Union
  5 | 
  6 | import torch
  7 | 
  8 | from safetensors import deserialize, safe_open, serialize, serialize_file
  9 | 
 10 | 
 11 | def storage_ptr(tensor: torch.Tensor) -> int:
 12 |     try:
 13 |         return tensor.untyped_storage().data_ptr()
 14 |     except Exception:
 15 |         # Fallback for torch==1.10
 16 |         try:
 17 |             return tensor.storage().data_ptr()
 18 |         except NotImplementedError:
 19 |             # Fallback for meta storage
 20 |             return 0
 21 | 
 22 | 
 23 | def _end_ptr(tensor: torch.Tensor) -> int:
 24 |     if tensor.nelement():
 25 |         stop = tensor.view(-1)[-1].data_ptr() + _SIZE[tensor.dtype]
 26 |     else:
 27 |         stop = tensor.data_ptr()
 28 |     return stop
 29 | 
 30 | 
 31 | def storage_size(tensor: torch.Tensor) -> int:
 32 |     try:
 33 |         return tensor.untyped_storage().nbytes()
 34 |     except AttributeError:
 35 |         # Fallback for torch==1.10
 36 |         try:
 37 |             return tensor.storage().size() * _SIZE[tensor.dtype]
 38 |         except NotImplementedError:
 39 |             # Fallback for meta storage
 40 |             # On torch >=2.0 this is the tensor size
 41 |             return tensor.nelement() * _SIZE[tensor.dtype]
 42 | 
 43 | 
 44 | def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]:
 45 |     filtered_tensors = []
 46 |     for shared in tensors:
 47 |         if len(shared) < 2:
 48 |             filtered_tensors.append(shared)
 49 |             continue
 50 | 
 51 |         areas = []
 52 |         for name in shared:
 53 |             tensor = state_dict[name]
 54 |             areas.append((tensor.data_ptr(), _end_ptr(tensor), name))
 55 |         areas.sort()
 56 | 
 57 |         _, last_stop, last_name = areas[0]
 58 |         filtered_tensors.append({last_name})
 59 |         for start, stop, name in areas[1:]:
 60 |             if start >= last_stop:
 61 |                 filtered_tensors.append({name})
 62 |             else:
 63 |                 filtered_tensors[-1].add(name)
 64 |             last_stop = stop
 65 | 
 66 |     return filtered_tensors
 67 | 
 68 | 
 69 | def _find_shared_tensors(state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]:
 70 |     tensors = defaultdict(set)
 71 |     for k, v in state_dict.items():
 72 |         if v.device != torch.device("meta") and storage_ptr(v) != 0 and storage_size(v) != 0:
 73 |             # Need to add device as key because of multiple GPU.
 74 |             tensors[(v.device, storage_ptr(v), storage_size(v))].add(k)
 75 |     tensors = list(sorted(tensors.values()))
 76 |     tensors = _filter_shared_not_shared(tensors, state_dict)
 77 |     return tensors
 78 | 
 79 | 
 80 | def _is_complete(tensor: torch.Tensor) -> bool:
 81 |     return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _SIZE[tensor.dtype] == storage_size(tensor)
 82 | 
 83 | 
 84 | def _remove_duplicate_names(
 85 |     state_dict: Dict[str, torch.Tensor],
 86 |     *,
 87 |     preferred_names: Optional[List[str]] = None,
 88 |     discard_names: Optional[List[str]] = None,
 89 | ) -> Dict[str, List[str]]:
 90 |     if preferred_names is None:
 91 |         preferred_names = []
 92 |     preferred_names = set(preferred_names)
 93 |     if discard_names is None:
 94 |         discard_names = []
 95 |     discard_names = set(discard_names)
 96 | 
 97 |     shareds = _find_shared_tensors(state_dict)
 98 |     to_remove = defaultdict(list)
 99 |     for shared in shareds:
100 |         complete_names = set([name for name in shared if _is_complete(state_dict[name])])
101 |         if not complete_names:
102 |             raise RuntimeError(
103 |                 "Error while trying to find names to remove to save state dict, but found no suitable name to keep"
104 |                 f" for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model"
105 |                 " since you could be storing much more memory than needed. Please refer to"
106 |                 " https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an"
107 |                 " issue."
108 |             )
109 | 
110 |         keep_name = sorted(list(complete_names))[0]
111 | 
112 |         # Mechanism to preferentially select keys to keep
113 |         # coming from the on-disk file to allow
114 |         # loading models saved with a different choice
115 |         # of keep_name
116 |         preferred = complete_names.difference(discard_names)
117 |         if preferred:
118 |             keep_name = sorted(list(preferred))[0]
119 | 
120 |         if preferred_names:
121 |             preferred = preferred_names.intersection(complete_names)
122 |             if preferred:
123 |                 keep_name = sorted(list(preferred))[0]
124 |         for name in sorted(shared):
125 |             if name != keep_name:
126 |                 to_remove[keep_name].append(name)
127 |     return to_remove
128 | 
129 | 
130 | def save_model(
131 |     model: torch.nn.Module, filename: str, metadata: Optional[Dict[str, str]] = None, force_contiguous: bool = True
132 | ):
133 |     """
134 |     Saves a given torch model to specified filename.
135 |     This method exists specifically to avoid tensor sharing issues which are
136 |     not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors)
137 | 
138 |     Args:
139 |         model (`torch.nn.Module`):
140 |             The model to save on disk.
141 |         filename (`str`):
142 |             The filename location to save the file
143 |         metadata (`Dict[str, str]`, *optional*):
144 |             Extra information to save along with the file.
145 |             Some metadata will be added for each dropped tensors.
146 |             This information will not be enough to recover the entire
147 |             shared structure but might help understanding things
148 |         force_contiguous (`boolean`, *optional*, defaults to True):
149 |             Forcing the state_dict to be saved as contiguous tensors.
150 |             This has no effect on the correctness of the model, but it
151 |             could potentially change performance if the layout of the tensor
152 |             was chosen specifically for that reason.
153 |     """
154 |     state_dict = model.state_dict()
155 |     to_removes = _remove_duplicate_names(state_dict)
156 | 
157 |     for kept_name, to_remove_group in to_removes.items():
158 |         for to_remove in to_remove_group:
159 |             if metadata is None:
160 |                 metadata = {}
161 | 
162 |             if to_remove not in metadata:
163 |                 # Do not override user data
164 |                 metadata[to_remove] = kept_name
165 |             del state_dict[to_remove]
166 |     if force_contiguous:
167 |         state_dict = {k: v.contiguous() for k, v in state_dict.items()}
168 |     try:
169 |         save_file(state_dict, filename, metadata=metadata)
170 |     except ValueError as e:
171 |         msg = str(e)
172 |         msg += " Or use save_model(..., force_contiguous=True), read the docs for potential caveats."
173 |         raise ValueError(msg)
174 | 
175 | 
176 | def load_model(model: torch.nn.Module, filename: Union[str, os.PathLike], strict=True) -> Tuple[List[str], List[str]]:
177 |     """
178 |     Loads a given filename onto a torch model.
179 |     This method exists specifically to avoid tensor sharing issues which are
180 |     not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors)
181 | 
182 |     Args:
183 |         model (`torch.nn.Module`):
184 |             The model to load onto.
185 |         filename (`str`, or `os.PathLike`):
186 |             The filename location to load the file from.
187 |         strict (`bool`, *optional*, defaults to True):
188 |             Wether to fail if you're missing keys or having unexpected ones
189 |             When false, the function simply returns missing and unexpected names.
190 | 
191 |     Returns:
192 |         `(missing, unexpected): (List[str], List[str])`
193 |             `missing` are names in the model which were not modified during loading
194 |             `unexpected` are names that are on the file, but weren't used during
195 |             the load.
196 |     """
197 |     state_dict = load_file(filename)
198 |     model_state_dict = model.state_dict()
199 |     to_removes = _remove_duplicate_names(model_state_dict, preferred_names=state_dict.keys())
200 |     missing, unexpected = model.load_state_dict(state_dict, strict=False)
201 |     missing = set(missing)
202 |     for to_remove_group in to_removes.values():
203 |         for to_remove in to_remove_group:
204 |             if to_remove not in missing:
205 |                 unexpected.append(to_remove)
206 |             else:
207 |                 missing.remove(to_remove)
208 |     if strict and (missing or unexpected):
209 |         missing_keys = ", ".join([f'"{k}"' for k in sorted(missing)])
210 |         unexpected_keys = ", ".join([f'"{k}"' for k in sorted(unexpected)])
211 |         error = f"Error(s) in loading state_dict for {model.__class__.__name__}:"
212 |         if missing:
213 |             error += f"\n    Missing key(s) in state_dict: {missing_keys}"
214 |         if unexpected:
215 |             error += f"\n    Unexpected key(s) in state_dict: {unexpected_keys}"
216 |         raise RuntimeError(error)
217 |     return missing, unexpected
218 | 
219 | 
220 | def save(tensors: Dict[str, torch.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes:
221 |     """
222 |     Saves a dictionary of tensors into raw bytes in safetensors format.
223 | 
224 |     Args:
225 |         tensors (`Dict[str, torch.Tensor]`):
226 |             The incoming tensors. Tensors need to be contiguous and dense.
227 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
228 |             Optional text only metadata you might want to save in your header.
229 |             For instance it can be useful to specify more about the underlying
230 |             tensors. This is purely informative and does not affect tensor loading.
231 | 
232 |     Returns:
233 |         `bytes`: The raw bytes representing the format
234 | 
235 |     Example:
236 | 
237 |     ```python
238 |     from safetensors.torch import save
239 |     import torch
240 | 
241 |     tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))}
242 |     byte_data = save(tensors)
243 |     ```
244 |     """
245 |     serialized = serialize(_flatten(tensors), metadata=metadata)
246 |     result = bytes(serialized)
247 |     return result
248 | 
249 | 
250 | def save_file(
251 |     tensors: Dict[str, torch.Tensor],
252 |     filename: Union[str, os.PathLike],
253 |     metadata: Optional[Dict[str, str]] = None,
254 | ):
255 |     """
256 |     Saves a dictionary of tensors into raw bytes in safetensors format.
257 | 
258 |     Args:
259 |         tensors (`Dict[str, torch.Tensor]`):
260 |             The incoming tensors. Tensors need to be contiguous and dense.
261 |         filename (`str`, or `os.PathLike`)):
262 |             The filename we're saving into.
263 |         metadata (`Dict[str, str]`, *optional*, defaults to `None`):
264 |             Optional text only metadata you might want to save in your header.
265 |             For instance it can be useful to specify more about the underlying
266 |             tensors. This is purely informative and does not affect tensor loading.
267 | 
268 |     Returns:
269 |         `None`
270 | 
271 |     Example:
272 | 
273 |     ```python
274 |     from safetensors.torch import save_file
275 |     import torch
276 | 
277 |     tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))}
278 |     save_file(tensors, "model.safetensors")
279 |     ```
280 |     """
281 |     serialize_file(_flatten(tensors), filename, metadata=metadata)
282 | 
283 | 
284 | def load_file(filename: Union[str, os.PathLike], device="cpu") -> Dict[str, torch.Tensor]:
285 |     """
286 |     Loads a safetensors file into torch format.
287 | 
288 |     Args:
289 |         filename (`str`, or `os.PathLike`):
290 |             The name of the file which contains the tensors
291 |         device (`Dict[str, any]`, *optional*, defaults to `cpu`):
292 |             The device where the tensors need to be located after load.
293 |             available options are all regular torch device locations
294 | 
295 |     Returns:
296 |         `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor`
297 | 
298 |     Example:
299 | 
300 |     ```python
301 |     from safetensors.torch import load_file
302 | 
303 |     file_path = "./my_folder/bert.safetensors"
304 |     loaded = load_file(file_path)
305 |     ```
306 |     """
307 |     result = {}
308 |     with safe_open(filename, framework="pt", device=device) as f:
309 |         for k in f.keys():
310 |             result[k] = f.get_tensor(k)
311 |     return result
312 | 
313 | 
314 | def load(data: bytes) -> Dict[str, torch.Tensor]:
315 |     """
316 |     Loads a safetensors file into torch format from pure bytes.
317 | 
318 |     Args:
319 |         data (`bytes`):
320 |             The content of a safetensors file
321 | 
322 |     Returns:
323 |         `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor` on cpu
324 | 
325 |     Example:
326 | 
327 |     ```python
328 |     from safetensors.torch import load
329 | 
330 |     file_path = "./my_folder/bert.safetensors"
331 |     with open(file_path, "rb") as f:
332 |         data = f.read()
333 | 
334 |     loaded = load(data)
335 |     ```
336 |     """
337 |     flat = deserialize(data)
338 |     return _view2torch(flat)
339 | 
340 | # torch.float8 formats require 2.1; we do not support these dtypes on earlier versions
341 | _float8_e4m3fn = getattr(torch, "float8_e4m3fn", None)
342 | _float8_e5m2 = getattr(torch, "float8_e5m2", None)
343 | 
344 | _SIZE = {
345 |     torch.int64: 8,
346 |     torch.float32: 4,
347 |     torch.int32: 4,
348 |     torch.bfloat16: 2,
349 |     torch.float16: 2,
350 |     torch.int16: 2,
351 |     torch.uint8: 1,
352 |     torch.int8: 1,
353 |     torch.bool: 1,
354 |     torch.float64: 8,
355 |     _float8_e4m3fn: 1,
356 |     _float8_e5m2: 1,
357 | }
358 | 
359 | _TYPES = {
360 |     "F64": torch.float64,
361 |     "F32": torch.float32,
362 |     "F16": torch.float16,
363 |     "BF16": torch.bfloat16,
364 |     "I64": torch.int64,
365 |     # "U64": torch.uint64,
366 |     "I32": torch.int32,
367 |     # "U32": torch.uint32,
368 |     "I16": torch.int16,
369 |     # "U16": torch.uint16,
370 |     "I8": torch.int8,
371 |     "U8": torch.uint8,
372 |     "BOOL": torch.bool,
373 |     "F8_E4M3": _float8_e4m3fn,
374 |     "F8_E5M2": _float8_e5m2,
375 | }
376 | 
377 | 
378 | def _getdtype(dtype_str: str) -> torch.dtype:
379 |     return _TYPES[dtype_str]
380 | 
381 | 
382 | def _view2torch(safeview) -> Dict[str, torch.Tensor]:
383 |     result = {}
384 |     for k, v in safeview:
385 |         dtype = _getdtype(v["dtype"])
386 |         arr = torch.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
387 |         if sys.byteorder == "big":
388 |             arr = torch.from_numpy(arr.numpy().byteswap(inplace=False))
389 |         result[k] = arr
390 | 
391 |     return result
392 | 
393 | 
394 | def _tobytes(tensor: torch.Tensor, name: str) -> bytes:
395 |     if tensor.layout != torch.strided:
396 |         raise ValueError(
397 |             f"You are trying to save a sparse tensor: `{name}` which this library does not support."
398 |             " You can make it a dense tensor before saving with `.to_dense()` but be aware this might"
399 |             " make a much larger file than needed."
400 |         )
401 | 
402 |     if not tensor.is_contiguous():
403 |         raise ValueError(
404 |             f"You are trying to save a non contiguous tensor: `{name}` which is not allowed. It either means you"
405 |             " are trying to save tensors which are reference of each other in which case it's recommended to save"
406 |             " only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to"
407 |             " pack it before saving."
408 |         )
409 |     if tensor.device.type != "cpu":
410 |         # Moving tensor to cpu before saving
411 |         tensor = tensor.to("cpu")
412 | 
413 |     import ctypes
414 | 
415 |     import numpy as np
416 | 
417 |     # When shape is empty (scalar), np.prod returns a float
418 |     # we need a int for the following calculations
419 |     length = int(np.prod(tensor.shape).item())
420 |     bytes_per_item = _SIZE[tensor.dtype]
421 | 
422 |     total_bytes = length * bytes_per_item
423 | 
424 |     ptr = tensor.data_ptr()
425 |     if ptr == 0:
426 |         return b""
427 |     newptr = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_ubyte))
428 |     data = np.ctypeslib.as_array(newptr, (total_bytes,))  # no internal copy
429 |     if sys.byteorder == "big":
430 |         NPDTYPES = {
431 |             torch.int64: np.int64,
432 |             torch.float32: np.float32,
433 |             torch.int32: np.int32,
434 |             # XXX: This is ok because both have the same width
435 |             torch.bfloat16: np.float16,
436 |             torch.float16: np.float16,
437 |             torch.int16: np.int16,
438 |             torch.uint8: np.uint8,
439 |             torch.int8: np.int8,
440 |             torch.bool: bool,
441 |             torch.float64: np.float64,
442 |             # XXX: This is ok because both have the same width and byteswap is a no-op anyway
443 |             _float8_e4m3fn: np.uint8,
444 |             _float8_e5m2: np.uint8,
445 |         }
446 |         npdtype = NPDTYPES[tensor.dtype]
447 |         # Not in place as that would potentially modify a live running model
448 |         data = data.view(npdtype).byteswap(inplace=False)
449 |     return data.tobytes()
450 | 
451 | 
452 | def _flatten(tensors: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, Any]]:
453 |     if not isinstance(tensors, dict):
454 |         raise ValueError(f"Expected a dict of [str, torch.Tensor] but received {type(tensors)}")
455 | 
456 |     invalid_tensors = []
457 |     for k, v in tensors.items():
458 |         if not isinstance(v, torch.Tensor):
459 |             raise ValueError(f"Key `{k}` is invalid, expected torch.Tensor but received {type(v)}")
460 | 
461 |         if v.layout != torch.strided:
462 |             invalid_tensors.append(k)
463 |     if invalid_tensors:
464 |         raise ValueError(
465 |             f"You are trying to save a sparse tensors: `{invalid_tensors}` which this library does not support."
466 |             " You can make it a dense tensor before saving with `.to_dense()` but be aware this might"
467 |             " make a much larger file than needed."
468 |         )
469 | 
470 |     shared_pointers = _find_shared_tensors(tensors)
471 |     failing = []
472 |     for names in shared_pointers:
473 |         if len(names) > 1:
474 |             failing.append(names)
475 | 
476 |     if failing:
477 |         raise RuntimeError(
478 |             f"""
479 |             Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: {failing}.
480 |             A potential way to correctly save your model is to use `save_model`.
481 |             More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
482 |             """
483 |         )
484 | 
485 |     return {
486 |         k: {
487 |             "dtype": str(v.dtype).split(".")[-1],
488 |             "shape": v.shape,
489 |             "data": _tobytes(v, k),
490 |         }
491 |         for k, v in tensors.items()
492 |     }
493 | 


--------------------------------------------------------------------------------
/Newbook.py:
--------------------------------------------------------------------------------
 1 | import tkinter as tk
 2 | from tkinter import ttk
 3 | from tkinter import scrolledtext
 4 | from transformers import GPT2LMHeadModel, GPT2Tokenizer
 5 | 
 6 | 
 7 | 
 8 | class NovelGeneratorApp(tk.Tk):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.title("Novel Generator")
12 |         self.geometry("800x600")
13 | 
14 |         self.create_widgets()
15 | 
16 |     def create_widgets(self):
17 |         self.theme_label = ttk.Label(self, text="Theme:")
18 |         self.theme_label.grid(row=0, column=0, sticky="w")
19 |         self.theme_entry = ttk.Entry(self, width=50)
20 |         self.theme_entry.grid(row=0, column=1, padx=5, pady=5)
21 | 
22 |         self.synopsis_label = ttk.Label(self, text="Synopsis:")
23 |         self.synopsis_label.grid(row=1, column=0, sticky="w")
24 |         self.synopsis_text = scrolledtext.ScrolledText(self, width=50, height=5)
25 |         self.synopsis_text.grid(row=1, column=1, padx=5, pady=5)
26 | 
27 |         self.words_label = ttk.Label(self, text="Number of Words:")
28 |         self.words_label.grid(row=2, column=0, sticky="w")
29 |         self.words_entry = ttk.Entry(self)
30 |         self.words_entry.grid(row=2, column=1, padx=5, pady=5)
31 | 
32 |         self.chapters_label = ttk.Label(self, text="Number of Chapters:")
33 |         self.chapters_label.grid(row=3, column=0, sticky="w")
34 |         self.chapters_entry = ttk.Entry(self)
35 |         self.chapters_entry.grid(row=3, column=1, padx=5, pady=5)
36 | 
37 |         self.generate_button = ttk.Button(self, text="Generate Novel", command=self.generate_novel)
38 |         self.generate_button.grid(row=4, column=0, columnspan=2, pady=10)
39 | 
40 |         self.generated_novel_label = ttk.Label(self, text="Generated Novel:")
41 |         self.generated_novel_label.grid(row=5, column=0, sticky="w")
42 |         self.generated_novel_text = scrolledtext.ScrolledText(self, width=80, height=20)
43 |         self.generated_novel_text.grid(row=6, column=0, columnspan=2, padx=5, pady=5)
44 | 
45 |     def generate_novel(self):
46 |         theme = self.theme_entry.get()
47 |         synopsis = self.synopsis_text.get("1.0", tk.END)
48 |         num_words = int(self.words_entry.get())
49 |         num_chapters = int(self.chapters_entry.get())
50 | 
51 |         novel_content = self.generate_novel_content(theme, synopsis, num_words, num_chapters)
52 | 
53 |         self.generated_novel_text.delete("1.0", tk.END)
54 |         self.generated_novel_text.insert(tk.END, novel_content)
55 | 
56 |     def generate_novel_content(self, theme, synopsis, num_words, num_chapters):
57 |         novel_content = f"Theme: {theme}\nSynopsis: {synopsis}\n\n"
58 |         tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
59 |         model = GPT2LMHeadModel.from_pretrained("gpt2")
60 |         for chapter in range(1, num_chapters + 1):
61 |             novel_content += f"Chapter {chapter}:\n"
62 |             chapter_text = self.generate_chapter_content(theme, synopsis, num_words, tokenizer, model)
63 |             novel_content += chapter_text + "\n\n"
64 |         return novel_content
65 | 
66 |     def generate_chapter_content(self, theme, synopsis, num_words, tokenizer, model):
67 |         input_text = f"{theme}. {synopsis}. "
68 |         input_ids = tokenizer.encode(input_text, return_tensors="pt")
69 |         max_length = num_words + len(input_ids[0])
70 |         output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
71 |         chapter_text = tokenizer.decode(output[0], skip_special_tokens=True)
72 |         return chapter_text
73 | 
74 | if __name__ == "__main__":
75 |     app = NovelGeneratorApp()
76 |     app.mainloop()
77 | 


--------------------------------------------------------------------------------
/Novel generator.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/Novel generator.mp4


--------------------------------------------------------------------------------
/bookgenerator.py:
--------------------------------------------------------------------------------
 1 | import tkinter as tk
 2 | from tkinter import ttk
 3 | from tkinter import scrolledtext
 4 | from tkinter import messagebox
 5 | import ebooklib
 6 | from ebooklib import epub
 7 | from transformers import GPT2LMHeadModel, GPT2Tokenizer
 8 | # https://www.upwork.com/ab/proposals/job/~01cbcbb64654457e34/apply/
 9 | # pip install torch torchvision torchaudio
10 | class NovelGeneratorApp(tk.Tk):
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.title("Novel Generator")
14 |         self.geometry("800x600")
15 | 
16 |         self.create_widgets()
17 | 
18 |     def create_widgets(self):
19 |         # Theme and Synopsis
20 |         self.theme_label = ttk.Label(self, text="Theme:")
21 |         self.theme_label.grid(row=0, column=0, sticky="w")
22 |         self.theme_entry = ttk.Entry(self, width=50)
23 |         self.theme_entry.grid(row=0, column=1, padx=5, pady=5)
24 | 
25 |         self.synopsis_label = ttk.Label(self, text="Synopsis:")
26 |         self.synopsis_label.grid(row=1, column=0, sticky="w")
27 |         self.synopsis_text = scrolledtext.ScrolledText(self, width=50, height=5)
28 |         self.synopsis_text.grid(row=1, column=1, padx=5, pady=5)
29 | 
30 |         # Number of Words and Chapters
31 |         self.words_label = ttk.Label(self, text="Number of Words:")
32 |         self.words_label.grid(row=2, column=0, sticky="w")
33 |         self.words_entry = ttk.Entry(self)
34 |         self.words_entry.grid(row=2, column=1, padx=5, pady=5)
35 | 
36 |         self.chapters_label = ttk.Label(self, text="Number of Chapters:")
37 |         self.chapters_label.grid(row=3, column=0, sticky="w")
38 |         self.chapters_entry = ttk.Entry(self)
39 |         self.chapters_entry.grid(row=3, column=1, padx=5, pady=5)
40 | 
41 |         # Generate Button
42 |         self.generate_button = ttk.Button(self, text="Generate Novel", command=self.generate_novel)
43 |         self.generate_button.grid(row=4, column=0, columnspan=2, pady=10)
44 | 
45 |         # Generated Novel Display
46 |         self.generated_novel_label = ttk.Label(self, text="Generated Novel:")
47 |         self.generated_novel_label.grid(row=5, column=0, sticky="w")
48 |         self.generated_novel_text = scrolledtext.ScrolledText(self, width=80, height=20)
49 |         self.generated_novel_text.grid(row=6, column=0, columnspan=2, padx=5, pady=5)
50 | 
51 |     def generate_novel(self):
52 |         # Get user input
53 |         theme = self.theme_entry.get()
54 |         synopsis = self.synopsis_text.get("1.0", tk.END)
55 |         num_words = int(self.words_entry.get())
56 |         num_chapters = int(self.chapters_entry.get())
57 | 
58 |         # Generate novel content using ChatGPT
59 |         novel_content = self.generate_novel_content(theme, synopsis, num_words, num_chapters)
60 | 
61 |         # Display generated novel
62 |         self.generated_novel_text.delete("1.0", tk.END)
63 |         self.generated_novel_text.insert(tk.END, novel_content)
64 |     
65 | 
66 |     def generate_novel_content(self, theme, synopsis, num_words, num_chapters):
67 |         # Use GPT-2 model for text generation
68 |         tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
69 |         model = GPT2LMHeadModel.from_pretrained("gpt2")
70 | 
71 |         # Construct a prompt string based on user input
72 |         prompt = f"{theme}. {synopsis}. One day, he experienced a life-changing event. Write a {num_words}-word chapter."
73 | 
74 |         # Generate novel content using the model
75 |         input_ids = tokenizer.encode(prompt, return_tensors="pt")
76 | 
77 |         max_length = num_words + 100  # Add extra tokens for safety
78 |         output = model.generate(
79 |             input_ids,
80 |             max_length=max_length,
81 |             num_return_sequences=num_chapters,
82 |             pad_token_id=tokenizer.eos_token_id,
83 |             num_beams=5,  # Example: Use beam search with 5 beams
84 |             early_stopping=True
85 |         )
86 | 
87 |         # Decode generated sequences
88 |         generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
89 | 
90 |         return generated_text
91 | if __name__ == "__main__":
92 |     app = NovelGeneratorApp()
93 |     app.mainloop()


--------------------------------------------------------------------------------
/sample_text.txt:
--------------------------------------------------------------------------------
1 | waht are 


--------------------------------------------------------------------------------
/start.py:
--------------------------------------------------------------------------------
 1 | import tkinter as tk
 2 | from tkinter import filedialog
 3 | from tkinter import scrolledtext
 4 | from transformers import BertForQuestionAnswering, BertTokenizer
 5 | 
 6 | import torch
 7 | 
 8 | class LLMApp(tk.Tk):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.title("LLM Question Answering App")
12 |         self.geometry("600x400")
13 | 
14 |         self.create_widgets()
15 | 
16 |     def create_widgets(self):
17 |         self.question_label = tk.Label(self, text="Enter your question:")
18 |         self.question_label.pack()
19 | 
20 |         self.question_entry = tk.Entry(self, width=50)
21 |         self.question_entry.pack()
22 | 
23 |         self.load_file_button = tk.Button(self, text="Load File", command=self.load_file)
24 |         self.load_file_button.pack()
25 | 
26 |         self.text_display = scrolledtext.ScrolledText(self, width=60, height=15)
27 |         self.text_display.pack()
28 | 
29 |         self.answer_label = tk.Label(self, text="Answer:")
30 |         self.answer_label.pack()
31 | 
32 |         self.answer_display = tk.Label(self, text="")
33 |         self.answer_display.pack()
34 | 
35 |         self.answer_button = tk.Button(self, text="Get Answer", command=self.get_answer)
36 |         self.answer_button.pack()
37 | 
38 |     def load_file(self):
39 |         file_path = filedialog.askopenfilename()
40 |         if file_path:
41 |             with open(file_path, "r", encoding="utf-8") as file:
42 |                 text = file.read()
43 |                 self.text_display.delete(1.0, tk.END)
44 |                 self.text_display.insert(tk.END, text)
45 | 
46 |     def get_answer(self):
47 |         question = self.question_entry.get()
48 |         if not question:
49 |             self.answer_display.config(text="Please enter a question.")
50 |             return
51 | 
52 |         file_text = self.text_display.get(1.0, tk.END)
53 |         if not file_text.strip():
54 |             self.answer_display.config(text="Please load a file.")
55 |             return
56 | 
57 |         answer = self.answer_question_from_text(file_text, question)
58 |         self.answer_display.config(text=answer)
59 | 
60 |     def answer_question_from_text(self, text, question):
61 |         # Specify the directory where you extracted the model files
62 |         model_directory = "/path/to/your/model/files"
63 | 
64 |         # Load the model and tokenizer from the manually downloaded files
65 |         model = BertForQuestionAnswering.from_pretrained(model_directory)
66 |         tokenizer = BertTokenizer.from_pretrained(model_directory)
67 | 
68 |         inputs = tokenizer(question, text, return_tensors="pt", max_length=512, truncation=True)
69 | 
70 |         with torch.no_grad():
71 |             start_scores, end_scores = model(**inputs)
72 | 
73 |         all_tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
74 |         answer_tokens = all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores) + 1]
75 |         answer = tokenizer.convert_tokens_to_string(answer_tokens)
76 | 
77 |         return answer
78 | 
79 | if __name__ == "__main__":
80 |     app = LLMApp()
81 |     app.mainloop()


--------------------------------------------------------------------------------