├── BERT ├── safetensors-0.4.2.dist-info │ ├── INSTALLER │ ├── METADATA │ ├── RECORD │ └── WHEEL └── safetensors │ ├── __init__.py │ ├── __init__.pyi │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── flax.cpython-311.pyc │ ├── mlx.cpython-311.pyc │ ├── numpy.cpython-311.pyc │ ├── paddle.cpython-311.pyc │ ├── tensorflow.cpython-311.pyc │ └── torch.cpython-311.pyc │ ├── _safetensors_rust.cp311-win_amd64.pyd │ ├── flax.py │ ├── mlx.py │ ├── numpy.py │ ├── paddle.py │ ├── py.typed │ ├── tensorflow.py │ └── torch.py ├── Newbook.py ├── Novel generator.mp4 ├── bookgenerator.py ├── sample_text.txt └── start.py /BERT/safetensors-0.4.2.dist-info/INSTALLER: -------------------------------------------------------------------------------- 1 | pip 2 | -------------------------------------------------------------------------------- /BERT/safetensors-0.4.2.dist-info/METADATA: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: safetensors 3 | Version: 0.4.2 4 | Classifier: Development Status :: 5 - Production/Stable 5 | Classifier: Intended Audience :: Developers 6 | Classifier: Intended Audience :: Education 7 | Classifier: Intended Audience :: Science/Research 8 | Classifier: License :: OSI Approved :: Apache Software License 9 | Classifier: Operating System :: OS Independent 10 | Classifier: Programming Language :: Python :: 3 11 | Classifier: Programming Language :: Python :: 3.7 12 | Classifier: Programming Language :: Python :: 3.8 13 | Classifier: Programming Language :: Python :: 3.9 14 | Classifier: Programming Language :: Python :: 3.10 15 | Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence 16 | Classifier: Typing :: Typed 17 | Requires-Dist: numpy >=1.21.6 ; extra == 'numpy' 18 | Requires-Dist: safetensors[numpy] ; extra == 'torch' 19 | Requires-Dist: torch >=1.10 ; extra == 'torch' 20 | Requires-Dist: safetensors[numpy] ; extra == 'tensorflow' 21 | Requires-Dist: tensorflow >=2.11.0 ; extra == 'tensorflow' 22 | Requires-Dist: safetensors[numpy] ; extra == 'pinned-tf' 23 | Requires-Dist: tensorflow ==2.11.0 ; extra == 'pinned-tf' 24 | Requires-Dist: safetensors[numpy] ; extra == 'jax' 25 | Requires-Dist: flax >=0.6.3 ; extra == 'jax' 26 | Requires-Dist: jax >=0.3.25 ; extra == 'jax' 27 | Requires-Dist: jaxlib >=0.3.25 ; extra == 'jax' 28 | Requires-Dist: mlx >=0.0.9 ; extra == 'mlx' 29 | Requires-Dist: safetensors[numpy] ; extra == 'paddlepaddle' 30 | Requires-Dist: paddlepaddle >=2.4.1 ; extra == 'paddlepaddle' 31 | Requires-Dist: black ==22.3 ; extra == 'quality' 32 | Requires-Dist: click ==8.0.4 ; extra == 'quality' 33 | Requires-Dist: isort >=5.5.4 ; extra == 'quality' 34 | Requires-Dist: flake8 >=3.8.3 ; extra == 'quality' 35 | Requires-Dist: safetensors[numpy] ; extra == 'testing' 36 | Requires-Dist: h5py >=3.7.0 ; extra == 'testing' 37 | Requires-Dist: huggingface_hub >=0.12.1 ; extra == 'testing' 38 | Requires-Dist: setuptools_rust >=1.5.2 ; extra == 'testing' 39 | Requires-Dist: pytest >=7.2.0 ; extra == 'testing' 40 | Requires-Dist: pytest-benchmark >=4.0.0 ; extra == 'testing' 41 | Requires-Dist: hypothesis >=6.70.2 ; extra == 'testing' 42 | Requires-Dist: safetensors[torch] ; extra == 'all' 43 | Requires-Dist: safetensors[numpy] ; extra == 'all' 44 | Requires-Dist: safetensors[pinned-tf] ; extra == 'all' 45 | Requires-Dist: safetensors[jax] ; extra == 'all' 46 | Requires-Dist: safetensors[paddlepaddle] ; extra == 'all' 47 | Requires-Dist: safetensors[quality] ; extra == 'all' 48 | Requires-Dist: safetensors[testing] ; extra == 'all' 49 | Requires-Dist: safetensors[all] ; extra == 'dev' 50 | Provides-Extra: numpy 51 | Provides-Extra: torch 52 | Provides-Extra: tensorflow 53 | Provides-Extra: pinned-tf 54 | Provides-Extra: jax 55 | Provides-Extra: mlx 56 | Provides-Extra: paddlepaddle 57 | Provides-Extra: quality 58 | Provides-Extra: testing 59 | Provides-Extra: all 60 | Provides-Extra: dev 61 | Author-email: Nicolas Patry 62 | Requires-Python: >=3.7 63 | Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM 64 | Project-URL: Homepage, https://github.com/huggingface/safetensors 65 | Project-URL: Source, https://github.com/huggingface/safetensors 66 | 67 | ## Installation 68 | 69 | ``` 70 | pip install safetensors 71 | ``` 72 | 73 | 74 | ## Usage 75 | 76 | ### Numpy 77 | 78 | ```python 79 | from safetensors.numpy import save_file, load_file 80 | import numpy as np 81 | 82 | tensors = { 83 | "a": np.zeros((2, 2)), 84 | "b": np.zeros((2, 3), dtype=np.uint8) 85 | } 86 | 87 | save_file(tensors, "./model.safetensors") 88 | 89 | 90 | # Now loading 91 | loaded = load_file("./model.safetensors") 92 | ``` 93 | 94 | ### Torch 95 | 96 | ```python 97 | from safetensors.torch import save_file, load_file 98 | import torch 99 | 100 | tensors = { 101 | "a": torch.zeros((2, 2)), 102 | "b": torch.zeros((2, 3), dtype=torch.uint8) 103 | } 104 | 105 | save_file(tensors, "./model.safetensors") 106 | 107 | 108 | # Now loading 109 | loaded = load_file("./model.safetensors") 110 | ``` 111 | 112 | ### Developing 113 | 114 | ``` 115 | # inside ./safetensors/bindings/python 116 | pip install .[dev] 117 | ``` 118 | Should be enough to install this library locally. 119 | 120 | ### Testing 121 | 122 | ``` 123 | # inside ./safetensors/bindings/python 124 | pip install .[dev] 125 | pytest -sv tests/ 126 | ``` 127 | 128 | -------------------------------------------------------------------------------- /BERT/safetensors-0.4.2.dist-info/RECORD: -------------------------------------------------------------------------------- 1 | safetensors-0.4.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 2 | safetensors-0.4.2.dist-info/METADATA,sha256=6zMGpEIufl1E1e5fk1bRwfKz-CzL6_W0NhvCe6KFVNM,3902 3 | safetensors-0.4.2.dist-info/RECORD,, 4 | safetensors-0.4.2.dist-info/WHEEL,sha256=QWeP2iNVclb1lqJMYxkB3S_jGRNF2KI-alH1eOqR1Tk,95 5 | safetensors/__init__.py,sha256=xxEn5gc4JWHjA0wnafb0Wwwq1m2QXwlbm5nsItGRqO0,180 6 | safetensors/__init__.pyi,sha256=Q6YltUfa-NjJVWCyolhrT-Yd03xQ4mn52eKKUXdAs4k,2043 7 | safetensors/__pycache__/__init__.cpython-311.pyc,, 8 | safetensors/__pycache__/flax.cpython-311.pyc,, 9 | safetensors/__pycache__/mlx.cpython-311.pyc,, 10 | safetensors/__pycache__/numpy.cpython-311.pyc,, 11 | safetensors/__pycache__/paddle.cpython-311.pyc,, 12 | safetensors/__pycache__/tensorflow.cpython-311.pyc,, 13 | safetensors/__pycache__/torch.cpython-311.pyc,, 14 | safetensors/_safetensors_rust.cp311-win_amd64.pyd,sha256=mXOYeQYjcgJM-rHeEfcSncIwmPEfucLK2ZstxVOdIow,594944 15 | safetensors/flax.py,sha256=lyCfXWTwOqkKvfIbsfy1lTNoslqZm5RjBvvg6d1ZNZU,3984 16 | safetensors/mlx.py,sha256=MfhCBl9M9uW5BnFngRg3CNobi7fZXxS3ulJ9so82cNE,3975 17 | safetensors/numpy.py,sha256=HBAwy9bXQIKM_uk328kXiZ8tAIhtiq4usngqAWu_l8Y,5113 18 | safetensors/paddle.py,sha256=0aZHDE0sR-Dp3Nrwy8Y28hax9yKWbiyot1rfEHfNbvg,4301 19 | safetensors/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 20 | safetensors/tensorflow.py,sha256=hvOLFjirEm-mZlzKcwbeNZutgcKtUxnaEP2VlrGd-Zc,4027 21 | safetensors/torch.py,sha256=Ycl2n5LlDMkio6FWQypleGs7lf78-6SL_CpR3WLxSC4,17786 22 | -------------------------------------------------------------------------------- /BERT/safetensors-0.4.2.dist-info/WHEEL: -------------------------------------------------------------------------------- 1 | Wheel-Version: 1.0 2 | Generator: maturin (1.4.0) 3 | Root-Is-Purelib: false 4 | Tag: cp311-none-win_amd64 5 | -------------------------------------------------------------------------------- /BERT/safetensors/__init__.py: -------------------------------------------------------------------------------- 1 | # Re-export this 2 | from ._safetensors_rust import ( # noqa: F401 3 | SafetensorError, 4 | __version__, 5 | deserialize, 6 | safe_open, 7 | serialize, 8 | serialize_file, 9 | ) 10 | -------------------------------------------------------------------------------- /BERT/safetensors/__init__.pyi: -------------------------------------------------------------------------------- 1 | # Generated content DO NOT EDIT 2 | @staticmethod 3 | def deserialize(bytes): 4 | """ 5 | Opens a safetensors lazily and returns tensors as asked 6 | 7 | Args: 8 | data (:obj:`bytes`): 9 | The byte content of a file 10 | 11 | Returns: 12 | (:obj:`List[str, Dict[str, Dict[str, any]]]`): 13 | The deserialized content is like: 14 | [("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)] 15 | """ 16 | pass 17 | 18 | @staticmethod 19 | def serialize(tensor_dict, metadata=None): 20 | """ 21 | Serializes raw data. 22 | 23 | Args: 24 | tensor_dict (:obj:`Dict[str, Dict[Any]]`): 25 | The tensor dict is like: 26 | {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} 27 | metadata (:obj:`Dict[str, str]`, *optional*): 28 | The optional purely text annotations 29 | 30 | Returns: 31 | (:obj:`bytes`): 32 | The serialized content. 33 | """ 34 | pass 35 | 36 | @staticmethod 37 | def serialize_file(tensor_dict, filename, metadata=None): 38 | """ 39 | Serializes raw data. 40 | 41 | Args: 42 | tensor_dict (:obj:`Dict[str, Dict[Any]]`): 43 | The tensor dict is like: 44 | {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} 45 | filename (:obj:`str`): 46 | The name of the file to write into. 47 | metadata (:obj:`Dict[str, str]`, *optional*): 48 | The optional purely text annotations 49 | 50 | Returns: 51 | (:obj:`bytes`): 52 | The serialized content. 53 | """ 54 | pass 55 | 56 | class safe_open: 57 | """ 58 | Opens a safetensors lazily and returns tensors as asked 59 | 60 | Args: 61 | filename (:obj:`str`): 62 | The filename to open 63 | 64 | framework (:obj:`str`): 65 | The framework you want you tensors in. Supported values: 66 | `pt`, `tf`, `flax`, `numpy`. 67 | 68 | device (:obj:`str`, defaults to :obj:`"cpu"`): 69 | The device on which you want the tensors. 70 | """ 71 | 72 | def __init__(self, filename, framework, device="cpu"): 73 | pass 74 | -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/flax.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/flax.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/mlx.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/mlx.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/numpy.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/numpy.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/paddle.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/paddle.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/tensorflow.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/tensorflow.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/__pycache__/torch.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/__pycache__/torch.cpython-311.pyc -------------------------------------------------------------------------------- /BERT/safetensors/_safetensors_rust.cp311-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/_safetensors_rust.cp311-win_amd64.pyd -------------------------------------------------------------------------------- /BERT/safetensors/flax.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Optional, Union 3 | 4 | import numpy as np 5 | 6 | import jax.numpy as jnp 7 | from jax import Array 8 | from safetensors import numpy, safe_open 9 | 10 | 11 | def save(tensors: Dict[str, Array], metadata: Optional[Dict[str, str]] = None) -> bytes: 12 | """ 13 | Saves a dictionary of tensors into raw bytes in safetensors format. 14 | 15 | Args: 16 | tensors (`Dict[str, Array]`): 17 | The incoming tensors. Tensors need to be contiguous and dense. 18 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 19 | Optional text only metadata you might want to save in your header. 20 | For instance it can be useful to specify more about the underlying 21 | tensors. This is purely informative and does not affect tensor loading. 22 | 23 | Returns: 24 | `bytes`: The raw bytes representing the format 25 | 26 | Example: 27 | 28 | ```python 29 | from safetensors.flax import save 30 | from jax import numpy as jnp 31 | 32 | tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))} 33 | byte_data = save(tensors) 34 | ``` 35 | """ 36 | np_tensors = _jnp2np(tensors) 37 | return numpy.save(np_tensors, metadata=metadata) 38 | 39 | 40 | def save_file( 41 | tensors: Dict[str, Array], 42 | filename: Union[str, os.PathLike], 43 | metadata: Optional[Dict[str, str]] = None, 44 | ) -> None: 45 | """ 46 | Saves a dictionary of tensors into raw bytes in safetensors format. 47 | 48 | Args: 49 | tensors (`Dict[str, Array]`): 50 | The incoming tensors. Tensors need to be contiguous and dense. 51 | filename (`str`, or `os.PathLike`)): 52 | The filename we're saving into. 53 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 54 | Optional text only metadata you might want to save in your header. 55 | For instance it can be useful to specify more about the underlying 56 | tensors. This is purely informative and does not affect tensor loading. 57 | 58 | Returns: 59 | `None` 60 | 61 | Example: 62 | 63 | ```python 64 | from safetensors.flax import save_file 65 | from jax import numpy as jnp 66 | 67 | tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))} 68 | save_file(tensors, "model.safetensors") 69 | ``` 70 | """ 71 | np_tensors = _jnp2np(tensors) 72 | return numpy.save_file(np_tensors, filename, metadata=metadata) 73 | 74 | 75 | def load(data: bytes) -> Dict[str, Array]: 76 | """ 77 | Loads a safetensors file into flax format from pure bytes. 78 | 79 | Args: 80 | data (`bytes`): 81 | The content of a safetensors file 82 | 83 | Returns: 84 | `Dict[str, Array]`: dictionary that contains name as key, value as `Array` on cpu 85 | 86 | Example: 87 | 88 | ```python 89 | from safetensors.flax import load 90 | 91 | file_path = "./my_folder/bert.safetensors" 92 | with open(file_path, "rb") as f: 93 | data = f.read() 94 | 95 | loaded = load(data) 96 | ``` 97 | """ 98 | flat = numpy.load(data) 99 | return _np2jnp(flat) 100 | 101 | 102 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, Array]: 103 | """ 104 | Loads a safetensors file into flax format. 105 | 106 | Args: 107 | filename (`str`, or `os.PathLike`)): 108 | The name of the file which contains the tensors 109 | 110 | Returns: 111 | `Dict[str, Array]`: dictionary that contains name as key, value as `Array` 112 | 113 | Example: 114 | 115 | ```python 116 | from safetensors.flax import load_file 117 | 118 | file_path = "./my_folder/bert.safetensors" 119 | loaded = load_file(file_path) 120 | ``` 121 | """ 122 | result = {} 123 | with safe_open(filename, framework="flax") as f: 124 | for k in f.keys(): 125 | result[k] = f.get_tensor(k) 126 | return result 127 | 128 | 129 | def _np2jnp(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, Array]: 130 | for k, v in numpy_dict.items(): 131 | numpy_dict[k] = jnp.array(v) 132 | return numpy_dict 133 | 134 | 135 | def _jnp2np(jnp_dict: Dict[str, Array]) -> Dict[str, np.array]: 136 | for k, v in jnp_dict.items(): 137 | jnp_dict[k] = np.asarray(v) 138 | return jnp_dict 139 | -------------------------------------------------------------------------------- /BERT/safetensors/mlx.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Optional, Union 3 | 4 | import numpy as np 5 | 6 | import mlx.core as mx 7 | from safetensors import numpy, safe_open 8 | 9 | 10 | def save(tensors: Dict[str, mx.array], metadata: Optional[Dict[str, str]] = None) -> bytes: 11 | """ 12 | Saves a dictionary of tensors into raw bytes in safetensors format. 13 | 14 | Args: 15 | tensors (`Dict[str, mx.array]`): 16 | The incoming tensors. Tensors need to be contiguous and dense. 17 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 18 | Optional text only metadata you might want to save in your header. 19 | For instance it can be useful to specify more about the underlying 20 | tensors. This is purely informative and does not affect tensor loading. 21 | 22 | Returns: 23 | `bytes`: The raw bytes representing the format 24 | 25 | Example: 26 | 27 | ```python 28 | from safetensors.mlx import save 29 | import mlx.core as mx 30 | 31 | tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))} 32 | byte_data = save(tensors) 33 | ``` 34 | """ 35 | np_tensors = _mx2np(tensors) 36 | return numpy.save(np_tensors, metadata=metadata) 37 | 38 | 39 | def save_file( 40 | tensors: Dict[str, mx.array], 41 | filename: Union[str, os.PathLike], 42 | metadata: Optional[Dict[str, str]] = None, 43 | ) -> None: 44 | """ 45 | Saves a dictionary of tensors into raw bytes in safetensors format. 46 | 47 | Args: 48 | tensors (`Dict[str, mx.array]`): 49 | The incoming tensors. Tensors need to be contiguous and dense. 50 | filename (`str`, or `os.PathLike`)): 51 | The filename we're saving into. 52 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 53 | Optional text only metadata you might want to save in your header. 54 | For instance it can be useful to specify more about the underlying 55 | tensors. This is purely informative and does not affect tensor loading. 56 | 57 | Returns: 58 | `None` 59 | 60 | Example: 61 | 62 | ```python 63 | from safetensors.mlx import save_file 64 | import mlx.core as mx 65 | 66 | tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))} 67 | save_file(tensors, "model.safetensors") 68 | ``` 69 | """ 70 | np_tensors = _mx2np(tensors) 71 | return numpy.save_file(np_tensors, filename, metadata=metadata) 72 | 73 | 74 | def load(data: bytes) -> Dict[str, mx.array]: 75 | """ 76 | Loads a safetensors file into MLX format from pure bytes. 77 | 78 | Args: 79 | data (`bytes`): 80 | The content of a safetensors file 81 | 82 | Returns: 83 | `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array` 84 | 85 | Example: 86 | 87 | ```python 88 | from safetensors.mlx import load 89 | 90 | file_path = "./my_folder/bert.safetensors" 91 | with open(file_path, "rb") as f: 92 | data = f.read() 93 | 94 | loaded = load(data) 95 | ``` 96 | """ 97 | flat = numpy.load(data) 98 | return _np2mx(flat) 99 | 100 | 101 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, mx.array]: 102 | """ 103 | Loads a safetensors file into MLX format. 104 | 105 | Args: 106 | filename (`str`, or `os.PathLike`)): 107 | The name of the file which contains the tensors 108 | 109 | Returns: 110 | `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array` 111 | 112 | Example: 113 | 114 | ```python 115 | from safetensors.flax import load_file 116 | 117 | file_path = "./my_folder/bert.safetensors" 118 | loaded = load_file(file_path) 119 | ``` 120 | """ 121 | result = {} 122 | with safe_open(filename, framework="mlx") as f: 123 | for k in f.keys(): 124 | result[k] = f.get_tensor(k) 125 | return result 126 | 127 | 128 | def _np2mx(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, mx.array]: 129 | for k, v in numpy_dict.items(): 130 | numpy_dict[k] = mx.array(v) 131 | return numpy_dict 132 | 133 | 134 | def _mx2np(mx_dict: Dict[str, mx.array]) -> Dict[str, np.array]: 135 | new_dict = {} 136 | for k, v in mx_dict.items(): 137 | new_dict[k] = np.asarray(v) 138 | return new_dict 139 | -------------------------------------------------------------------------------- /BERT/safetensors/numpy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from typing import Dict, Optional, Union 4 | 5 | import numpy as np 6 | 7 | from safetensors import deserialize, safe_open, serialize, serialize_file 8 | 9 | 10 | def _tobytes(tensor: np.ndarray) -> bytes: 11 | if not _is_little_endian(tensor): 12 | tensor = tensor.byteswap(inplace=False) 13 | return tensor.tobytes() 14 | 15 | 16 | def save(tensor_dict: Dict[str, np.ndarray], metadata: Optional[Dict[str, str]] = None) -> bytes: 17 | """ 18 | Saves a dictionary of tensors into raw bytes in safetensors format. 19 | 20 | Args: 21 | tensor_dict (`Dict[str, np.ndarray]`): 22 | The incoming tensors. Tensors need to be contiguous and dense. 23 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 24 | Optional text only metadata you might want to save in your header. 25 | For instance it can be useful to specify more about the underlying 26 | tensors. This is purely informative and does not affect tensor loading. 27 | 28 | Returns: 29 | `bytes`: The raw bytes representing the format 30 | 31 | Example: 32 | 33 | ```python 34 | from safetensors.numpy import save 35 | import numpy as np 36 | 37 | tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))} 38 | byte_data = save(tensors) 39 | ``` 40 | """ 41 | flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()} 42 | serialized = serialize(flattened, metadata=metadata) 43 | result = bytes(serialized) 44 | return result 45 | 46 | 47 | def save_file( 48 | tensor_dict: Dict[str, np.ndarray], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None 49 | ) -> None: 50 | """ 51 | Saves a dictionary of tensors into raw bytes in safetensors format. 52 | 53 | Args: 54 | tensor_dict (`Dict[str, np.ndarray]`): 55 | The incoming tensors. Tensors need to be contiguous and dense. 56 | filename (`str`, or `os.PathLike`)): 57 | The filename we're saving into. 58 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 59 | Optional text only metadata you might want to save in your header. 60 | For instance it can be useful to specify more about the underlying 61 | tensors. This is purely informative and does not affect tensor loading. 62 | 63 | Returns: 64 | `None` 65 | 66 | Example: 67 | 68 | ```python 69 | from safetensors.numpy import save_file 70 | import numpy as np 71 | 72 | tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))} 73 | save_file(tensors, "model.safetensors") 74 | ``` 75 | """ 76 | flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()} 77 | serialize_file(flattened, filename, metadata=metadata) 78 | 79 | 80 | def load(data: bytes) -> Dict[str, np.ndarray]: 81 | """ 82 | Loads a safetensors file into numpy format from pure bytes. 83 | 84 | Args: 85 | data (`bytes`): 86 | The content of a safetensors file 87 | 88 | Returns: 89 | `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` on cpu 90 | 91 | Example: 92 | 93 | ```python 94 | from safetensors.numpy import load 95 | 96 | file_path = "./my_folder/bert.safetensors" 97 | with open(file_path, "rb") as f: 98 | data = f.read() 99 | 100 | loaded = load(data) 101 | ``` 102 | """ 103 | flat = deserialize(data) 104 | return _view2np(flat) 105 | 106 | 107 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]: 108 | """ 109 | Loads a safetensors file into numpy format. 110 | 111 | Args: 112 | filename (`str`, or `os.PathLike`)): 113 | The name of the file which contains the tensors 114 | 115 | Returns: 116 | `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` 117 | 118 | Example: 119 | 120 | ```python 121 | from safetensors.numpy import load_file 122 | 123 | file_path = "./my_folder/bert.safetensors" 124 | loaded = load_file(file_path) 125 | ``` 126 | """ 127 | result = {} 128 | with safe_open(filename, framework="np") as f: 129 | for k in f.keys(): 130 | result[k] = f.get_tensor(k) 131 | return result 132 | 133 | 134 | _TYPES = { 135 | "F64": np.float64, 136 | "F32": np.float32, 137 | "F16": np.float16, 138 | "I64": np.int64, 139 | "U64": np.uint64, 140 | "I32": np.int32, 141 | "U32": np.uint32, 142 | "I16": np.int16, 143 | "U16": np.uint16, 144 | "I8": np.int8, 145 | "U8": np.uint8, 146 | "BOOL": bool, 147 | } 148 | 149 | 150 | def _getdtype(dtype_str: str) -> np.dtype: 151 | return _TYPES[dtype_str] 152 | 153 | 154 | def _view2np(safeview) -> Dict[str, np.ndarray]: 155 | result = {} 156 | for k, v in safeview: 157 | dtype = _getdtype(v["dtype"]) 158 | arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"]) 159 | result[k] = arr 160 | return result 161 | 162 | 163 | def _is_little_endian(tensor: np.ndarray) -> bool: 164 | byteorder = tensor.dtype.byteorder 165 | if byteorder == "=": 166 | if sys.byteorder == "little": 167 | return True 168 | else: 169 | return False 170 | elif byteorder == "|": 171 | return True 172 | elif byteorder == "<": 173 | return True 174 | elif byteorder == ">": 175 | return False 176 | raise ValueError(f"Unexpected byte order {byteorder}") 177 | -------------------------------------------------------------------------------- /BERT/safetensors/paddle.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Optional, Union 3 | 4 | import numpy as np 5 | 6 | import paddle 7 | from safetensors import numpy 8 | 9 | 10 | def save(tensors: Dict[str, paddle.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: 11 | """ 12 | Saves a dictionary of tensors into raw bytes in safetensors format. 13 | 14 | Args: 15 | tensors (`Dict[str, paddle.Tensor]`): 16 | The incoming tensors. Tensors need to be contiguous and dense. 17 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 18 | Optional text only metadata you might want to save in your header. 19 | For instance it can be useful to specify more about the underlying 20 | tensors. This is purely informative and does not affect tensor loading. 21 | 22 | Returns: 23 | `bytes`: The raw bytes representing the format 24 | 25 | Example: 26 | 27 | ```python 28 | from safetensors.paddle import save 29 | import paddle 30 | 31 | tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))} 32 | byte_data = save(tensors) 33 | ``` 34 | """ 35 | np_tensors = _paddle2np(tensors) 36 | return numpy.save(np_tensors, metadata=metadata) 37 | 38 | 39 | def save_file( 40 | tensors: Dict[str, paddle.Tensor], 41 | filename: Union[str, os.PathLike], 42 | metadata: Optional[Dict[str, str]] = None, 43 | ) -> None: 44 | """ 45 | Saves a dictionary of tensors into raw bytes in safetensors format. 46 | 47 | Args: 48 | tensors (`Dict[str, paddle.Tensor]`): 49 | The incoming tensors. Tensors need to be contiguous and dense. 50 | filename (`str`, or `os.PathLike`)): 51 | The filename we're saving into. 52 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 53 | Optional text only metadata you might want to save in your header. 54 | For instance it can be useful to specify more about the underlying 55 | tensors. This is purely informative and does not affect tensor loading. 56 | 57 | Returns: 58 | `None` 59 | 60 | Example: 61 | 62 | ```python 63 | from safetensors.paddle import save_file 64 | import paddle 65 | 66 | tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))} 67 | save_file(tensors, "model.safetensors") 68 | ``` 69 | """ 70 | np_tensors = _paddle2np(tensors) 71 | return numpy.save_file(np_tensors, filename, metadata=metadata) 72 | 73 | 74 | def load(data: bytes, device: str = "cpu") -> Dict[str, paddle.Tensor]: 75 | """ 76 | Loads a safetensors file into paddle format from pure bytes. 77 | 78 | Args: 79 | data (`bytes`): 80 | The content of a safetensors file 81 | 82 | Returns: 83 | `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` on cpu 84 | 85 | Example: 86 | 87 | ```python 88 | from safetensors.paddle import load 89 | 90 | file_path = "./my_folder/bert.safetensors" 91 | with open(file_path, "rb") as f: 92 | data = f.read() 93 | 94 | loaded = load(data) 95 | ``` 96 | """ 97 | flat = numpy.load(data) 98 | return _np2paddle(flat, device) 99 | 100 | 101 | def load_file(filename: Union[str, os.PathLike], device="cpu") -> Dict[str, paddle.Tensor]: 102 | """ 103 | Loads a safetensors file into paddle format. 104 | 105 | Args: 106 | filename (`str`, or `os.PathLike`)): 107 | The name of the file which contains the tensors 108 | device (`Dict[str, any]`, *optional*, defaults to `cpu`): 109 | The device where the tensors need to be located after load. 110 | available options are all regular paddle device locations 111 | 112 | Returns: 113 | `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` 114 | 115 | Example: 116 | 117 | ```python 118 | from safetensors.paddle import load_file 119 | 120 | file_path = "./my_folder/bert.safetensors" 121 | loaded = load_file(file_path) 122 | ``` 123 | """ 124 | flat = numpy.load_file(filename) 125 | output = _np2paddle(flat, device) 126 | return output 127 | 128 | 129 | def _np2paddle(numpy_dict: Dict[str, np.ndarray], device: str = "cpu") -> Dict[str, paddle.Tensor]: 130 | for k, v in numpy_dict.items(): 131 | numpy_dict[k] = paddle.to_tensor(v, place=device) 132 | return numpy_dict 133 | 134 | 135 | def _paddle2np(paddle_dict: Dict[str, paddle.Tensor]) -> Dict[str, np.array]: 136 | for k, v in paddle_dict.items(): 137 | paddle_dict[k] = v.detach().cpu().numpy() 138 | return paddle_dict 139 | -------------------------------------------------------------------------------- /BERT/safetensors/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/BERT/safetensors/py.typed -------------------------------------------------------------------------------- /BERT/safetensors/tensorflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, Optional, Union 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | from safetensors import numpy, safe_open 8 | 9 | 10 | def save(tensors: Dict[str, tf.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: 11 | """ 12 | Saves a dictionary of tensors into raw bytes in safetensors format. 13 | 14 | Args: 15 | tensors (`Dict[str, tf.Tensor]`): 16 | The incoming tensors. Tensors need to be contiguous and dense. 17 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 18 | Optional text only metadata you might want to save in your header. 19 | For instance it can be useful to specify more about the underlying 20 | tensors. This is purely informative and does not affect tensor loading. 21 | 22 | Returns: 23 | `bytes`: The raw bytes representing the format 24 | 25 | Example: 26 | 27 | ```python 28 | from safetensors.tensorflow import save 29 | import tensorflow as tf 30 | 31 | tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))} 32 | byte_data = save(tensors) 33 | ``` 34 | """ 35 | np_tensors = _tf2np(tensors) 36 | return numpy.save(np_tensors, metadata=metadata) 37 | 38 | 39 | def save_file( 40 | tensors: Dict[str, tf.Tensor], 41 | filename: Union[str, os.PathLike], 42 | metadata: Optional[Dict[str, str]] = None, 43 | ) -> None: 44 | """ 45 | Saves a dictionary of tensors into raw bytes in safetensors format. 46 | 47 | Args: 48 | tensors (`Dict[str, tf.Tensor]`): 49 | The incoming tensors. Tensors need to be contiguous and dense. 50 | filename (`str`, or `os.PathLike`)): 51 | The filename we're saving into. 52 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 53 | Optional text only metadata you might want to save in your header. 54 | For instance it can be useful to specify more about the underlying 55 | tensors. This is purely informative and does not affect tensor loading. 56 | 57 | Returns: 58 | `None` 59 | 60 | Example: 61 | 62 | ```python 63 | from safetensors.tensorflow import save_file 64 | import tensorflow as tf 65 | 66 | tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))} 67 | save_file(tensors, "model.safetensors") 68 | ``` 69 | """ 70 | np_tensors = _tf2np(tensors) 71 | return numpy.save_file(np_tensors, filename, metadata=metadata) 72 | 73 | 74 | def load(data: bytes) -> Dict[str, tf.Tensor]: 75 | """ 76 | Loads a safetensors file into tensorflow format from pure bytes. 77 | 78 | Args: 79 | data (`bytes`): 80 | The content of a safetensors file 81 | 82 | Returns: 83 | `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` on cpu 84 | 85 | Example: 86 | 87 | ```python 88 | from safetensors.tensorflow import load 89 | 90 | file_path = "./my_folder/bert.safetensors" 91 | with open(file_path, "rb") as f: 92 | data = f.read() 93 | 94 | loaded = load(data) 95 | ``` 96 | """ 97 | flat = numpy.load(data) 98 | return _np2tf(flat) 99 | 100 | 101 | def load_file(filename: Union[str, os.PathLike]) -> Dict[str, tf.Tensor]: 102 | """ 103 | Loads a safetensors file into tensorflow format. 104 | 105 | Args: 106 | filename (`str`, or `os.PathLike`)): 107 | The name of the file which contains the tensors 108 | 109 | Returns: 110 | `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` 111 | 112 | Example: 113 | 114 | ```python 115 | from safetensors.tensorflow import load_file 116 | 117 | file_path = "./my_folder/bert.safetensors" 118 | loaded = load_file(file_path) 119 | ``` 120 | """ 121 | result = {} 122 | with safe_open(filename, framework="tf") as f: 123 | for k in f.keys(): 124 | result[k] = f.get_tensor(k) 125 | return result 126 | 127 | 128 | def _np2tf(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, tf.Tensor]: 129 | for k, v in numpy_dict.items(): 130 | numpy_dict[k] = tf.convert_to_tensor(v) 131 | return numpy_dict 132 | 133 | 134 | def _tf2np(tf_dict: Dict[str, tf.Tensor]) -> Dict[str, np.array]: 135 | for k, v in tf_dict.items(): 136 | tf_dict[k] = v.numpy() 137 | return tf_dict 138 | -------------------------------------------------------------------------------- /BERT/safetensors/torch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from collections import defaultdict 4 | from typing import Any, Dict, List, Optional, Set, Tuple, Union 5 | 6 | import torch 7 | 8 | from safetensors import deserialize, safe_open, serialize, serialize_file 9 | 10 | 11 | def storage_ptr(tensor: torch.Tensor) -> int: 12 | try: 13 | return tensor.untyped_storage().data_ptr() 14 | except Exception: 15 | # Fallback for torch==1.10 16 | try: 17 | return tensor.storage().data_ptr() 18 | except NotImplementedError: 19 | # Fallback for meta storage 20 | return 0 21 | 22 | 23 | def _end_ptr(tensor: torch.Tensor) -> int: 24 | if tensor.nelement(): 25 | stop = tensor.view(-1)[-1].data_ptr() + _SIZE[tensor.dtype] 26 | else: 27 | stop = tensor.data_ptr() 28 | return stop 29 | 30 | 31 | def storage_size(tensor: torch.Tensor) -> int: 32 | try: 33 | return tensor.untyped_storage().nbytes() 34 | except AttributeError: 35 | # Fallback for torch==1.10 36 | try: 37 | return tensor.storage().size() * _SIZE[tensor.dtype] 38 | except NotImplementedError: 39 | # Fallback for meta storage 40 | # On torch >=2.0 this is the tensor size 41 | return tensor.nelement() * _SIZE[tensor.dtype] 42 | 43 | 44 | def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]: 45 | filtered_tensors = [] 46 | for shared in tensors: 47 | if len(shared) < 2: 48 | filtered_tensors.append(shared) 49 | continue 50 | 51 | areas = [] 52 | for name in shared: 53 | tensor = state_dict[name] 54 | areas.append((tensor.data_ptr(), _end_ptr(tensor), name)) 55 | areas.sort() 56 | 57 | _, last_stop, last_name = areas[0] 58 | filtered_tensors.append({last_name}) 59 | for start, stop, name in areas[1:]: 60 | if start >= last_stop: 61 | filtered_tensors.append({name}) 62 | else: 63 | filtered_tensors[-1].add(name) 64 | last_stop = stop 65 | 66 | return filtered_tensors 67 | 68 | 69 | def _find_shared_tensors(state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]: 70 | tensors = defaultdict(set) 71 | for k, v in state_dict.items(): 72 | if v.device != torch.device("meta") and storage_ptr(v) != 0 and storage_size(v) != 0: 73 | # Need to add device as key because of multiple GPU. 74 | tensors[(v.device, storage_ptr(v), storage_size(v))].add(k) 75 | tensors = list(sorted(tensors.values())) 76 | tensors = _filter_shared_not_shared(tensors, state_dict) 77 | return tensors 78 | 79 | 80 | def _is_complete(tensor: torch.Tensor) -> bool: 81 | return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _SIZE[tensor.dtype] == storage_size(tensor) 82 | 83 | 84 | def _remove_duplicate_names( 85 | state_dict: Dict[str, torch.Tensor], 86 | *, 87 | preferred_names: Optional[List[str]] = None, 88 | discard_names: Optional[List[str]] = None, 89 | ) -> Dict[str, List[str]]: 90 | if preferred_names is None: 91 | preferred_names = [] 92 | preferred_names = set(preferred_names) 93 | if discard_names is None: 94 | discard_names = [] 95 | discard_names = set(discard_names) 96 | 97 | shareds = _find_shared_tensors(state_dict) 98 | to_remove = defaultdict(list) 99 | for shared in shareds: 100 | complete_names = set([name for name in shared if _is_complete(state_dict[name])]) 101 | if not complete_names: 102 | raise RuntimeError( 103 | "Error while trying to find names to remove to save state dict, but found no suitable name to keep" 104 | f" for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model" 105 | " since you could be storing much more memory than needed. Please refer to" 106 | " https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an" 107 | " issue." 108 | ) 109 | 110 | keep_name = sorted(list(complete_names))[0] 111 | 112 | # Mechanism to preferentially select keys to keep 113 | # coming from the on-disk file to allow 114 | # loading models saved with a different choice 115 | # of keep_name 116 | preferred = complete_names.difference(discard_names) 117 | if preferred: 118 | keep_name = sorted(list(preferred))[0] 119 | 120 | if preferred_names: 121 | preferred = preferred_names.intersection(complete_names) 122 | if preferred: 123 | keep_name = sorted(list(preferred))[0] 124 | for name in sorted(shared): 125 | if name != keep_name: 126 | to_remove[keep_name].append(name) 127 | return to_remove 128 | 129 | 130 | def save_model( 131 | model: torch.nn.Module, filename: str, metadata: Optional[Dict[str, str]] = None, force_contiguous: bool = True 132 | ): 133 | """ 134 | Saves a given torch model to specified filename. 135 | This method exists specifically to avoid tensor sharing issues which are 136 | not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors) 137 | 138 | Args: 139 | model (`torch.nn.Module`): 140 | The model to save on disk. 141 | filename (`str`): 142 | The filename location to save the file 143 | metadata (`Dict[str, str]`, *optional*): 144 | Extra information to save along with the file. 145 | Some metadata will be added for each dropped tensors. 146 | This information will not be enough to recover the entire 147 | shared structure but might help understanding things 148 | force_contiguous (`boolean`, *optional*, defaults to True): 149 | Forcing the state_dict to be saved as contiguous tensors. 150 | This has no effect on the correctness of the model, but it 151 | could potentially change performance if the layout of the tensor 152 | was chosen specifically for that reason. 153 | """ 154 | state_dict = model.state_dict() 155 | to_removes = _remove_duplicate_names(state_dict) 156 | 157 | for kept_name, to_remove_group in to_removes.items(): 158 | for to_remove in to_remove_group: 159 | if metadata is None: 160 | metadata = {} 161 | 162 | if to_remove not in metadata: 163 | # Do not override user data 164 | metadata[to_remove] = kept_name 165 | del state_dict[to_remove] 166 | if force_contiguous: 167 | state_dict = {k: v.contiguous() for k, v in state_dict.items()} 168 | try: 169 | save_file(state_dict, filename, metadata=metadata) 170 | except ValueError as e: 171 | msg = str(e) 172 | msg += " Or use save_model(..., force_contiguous=True), read the docs for potential caveats." 173 | raise ValueError(msg) 174 | 175 | 176 | def load_model(model: torch.nn.Module, filename: Union[str, os.PathLike], strict=True) -> Tuple[List[str], List[str]]: 177 | """ 178 | Loads a given filename onto a torch model. 179 | This method exists specifically to avoid tensor sharing issues which are 180 | not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors) 181 | 182 | Args: 183 | model (`torch.nn.Module`): 184 | The model to load onto. 185 | filename (`str`, or `os.PathLike`): 186 | The filename location to load the file from. 187 | strict (`bool`, *optional*, defaults to True): 188 | Wether to fail if you're missing keys or having unexpected ones 189 | When false, the function simply returns missing and unexpected names. 190 | 191 | Returns: 192 | `(missing, unexpected): (List[str], List[str])` 193 | `missing` are names in the model which were not modified during loading 194 | `unexpected` are names that are on the file, but weren't used during 195 | the load. 196 | """ 197 | state_dict = load_file(filename) 198 | model_state_dict = model.state_dict() 199 | to_removes = _remove_duplicate_names(model_state_dict, preferred_names=state_dict.keys()) 200 | missing, unexpected = model.load_state_dict(state_dict, strict=False) 201 | missing = set(missing) 202 | for to_remove_group in to_removes.values(): 203 | for to_remove in to_remove_group: 204 | if to_remove not in missing: 205 | unexpected.append(to_remove) 206 | else: 207 | missing.remove(to_remove) 208 | if strict and (missing or unexpected): 209 | missing_keys = ", ".join([f'"{k}"' for k in sorted(missing)]) 210 | unexpected_keys = ", ".join([f'"{k}"' for k in sorted(unexpected)]) 211 | error = f"Error(s) in loading state_dict for {model.__class__.__name__}:" 212 | if missing: 213 | error += f"\n Missing key(s) in state_dict: {missing_keys}" 214 | if unexpected: 215 | error += f"\n Unexpected key(s) in state_dict: {unexpected_keys}" 216 | raise RuntimeError(error) 217 | return missing, unexpected 218 | 219 | 220 | def save(tensors: Dict[str, torch.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: 221 | """ 222 | Saves a dictionary of tensors into raw bytes in safetensors format. 223 | 224 | Args: 225 | tensors (`Dict[str, torch.Tensor]`): 226 | The incoming tensors. Tensors need to be contiguous and dense. 227 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 228 | Optional text only metadata you might want to save in your header. 229 | For instance it can be useful to specify more about the underlying 230 | tensors. This is purely informative and does not affect tensor loading. 231 | 232 | Returns: 233 | `bytes`: The raw bytes representing the format 234 | 235 | Example: 236 | 237 | ```python 238 | from safetensors.torch import save 239 | import torch 240 | 241 | tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))} 242 | byte_data = save(tensors) 243 | ``` 244 | """ 245 | serialized = serialize(_flatten(tensors), metadata=metadata) 246 | result = bytes(serialized) 247 | return result 248 | 249 | 250 | def save_file( 251 | tensors: Dict[str, torch.Tensor], 252 | filename: Union[str, os.PathLike], 253 | metadata: Optional[Dict[str, str]] = None, 254 | ): 255 | """ 256 | Saves a dictionary of tensors into raw bytes in safetensors format. 257 | 258 | Args: 259 | tensors (`Dict[str, torch.Tensor]`): 260 | The incoming tensors. Tensors need to be contiguous and dense. 261 | filename (`str`, or `os.PathLike`)): 262 | The filename we're saving into. 263 | metadata (`Dict[str, str]`, *optional*, defaults to `None`): 264 | Optional text only metadata you might want to save in your header. 265 | For instance it can be useful to specify more about the underlying 266 | tensors. This is purely informative and does not affect tensor loading. 267 | 268 | Returns: 269 | `None` 270 | 271 | Example: 272 | 273 | ```python 274 | from safetensors.torch import save_file 275 | import torch 276 | 277 | tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))} 278 | save_file(tensors, "model.safetensors") 279 | ``` 280 | """ 281 | serialize_file(_flatten(tensors), filename, metadata=metadata) 282 | 283 | 284 | def load_file(filename: Union[str, os.PathLike], device="cpu") -> Dict[str, torch.Tensor]: 285 | """ 286 | Loads a safetensors file into torch format. 287 | 288 | Args: 289 | filename (`str`, or `os.PathLike`): 290 | The name of the file which contains the tensors 291 | device (`Dict[str, any]`, *optional*, defaults to `cpu`): 292 | The device where the tensors need to be located after load. 293 | available options are all regular torch device locations 294 | 295 | Returns: 296 | `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor` 297 | 298 | Example: 299 | 300 | ```python 301 | from safetensors.torch import load_file 302 | 303 | file_path = "./my_folder/bert.safetensors" 304 | loaded = load_file(file_path) 305 | ``` 306 | """ 307 | result = {} 308 | with safe_open(filename, framework="pt", device=device) as f: 309 | for k in f.keys(): 310 | result[k] = f.get_tensor(k) 311 | return result 312 | 313 | 314 | def load(data: bytes) -> Dict[str, torch.Tensor]: 315 | """ 316 | Loads a safetensors file into torch format from pure bytes. 317 | 318 | Args: 319 | data (`bytes`): 320 | The content of a safetensors file 321 | 322 | Returns: 323 | `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor` on cpu 324 | 325 | Example: 326 | 327 | ```python 328 | from safetensors.torch import load 329 | 330 | file_path = "./my_folder/bert.safetensors" 331 | with open(file_path, "rb") as f: 332 | data = f.read() 333 | 334 | loaded = load(data) 335 | ``` 336 | """ 337 | flat = deserialize(data) 338 | return _view2torch(flat) 339 | 340 | # torch.float8 formats require 2.1; we do not support these dtypes on earlier versions 341 | _float8_e4m3fn = getattr(torch, "float8_e4m3fn", None) 342 | _float8_e5m2 = getattr(torch, "float8_e5m2", None) 343 | 344 | _SIZE = { 345 | torch.int64: 8, 346 | torch.float32: 4, 347 | torch.int32: 4, 348 | torch.bfloat16: 2, 349 | torch.float16: 2, 350 | torch.int16: 2, 351 | torch.uint8: 1, 352 | torch.int8: 1, 353 | torch.bool: 1, 354 | torch.float64: 8, 355 | _float8_e4m3fn: 1, 356 | _float8_e5m2: 1, 357 | } 358 | 359 | _TYPES = { 360 | "F64": torch.float64, 361 | "F32": torch.float32, 362 | "F16": torch.float16, 363 | "BF16": torch.bfloat16, 364 | "I64": torch.int64, 365 | # "U64": torch.uint64, 366 | "I32": torch.int32, 367 | # "U32": torch.uint32, 368 | "I16": torch.int16, 369 | # "U16": torch.uint16, 370 | "I8": torch.int8, 371 | "U8": torch.uint8, 372 | "BOOL": torch.bool, 373 | "F8_E4M3": _float8_e4m3fn, 374 | "F8_E5M2": _float8_e5m2, 375 | } 376 | 377 | 378 | def _getdtype(dtype_str: str) -> torch.dtype: 379 | return _TYPES[dtype_str] 380 | 381 | 382 | def _view2torch(safeview) -> Dict[str, torch.Tensor]: 383 | result = {} 384 | for k, v in safeview: 385 | dtype = _getdtype(v["dtype"]) 386 | arr = torch.frombuffer(v["data"], dtype=dtype).reshape(v["shape"]) 387 | if sys.byteorder == "big": 388 | arr = torch.from_numpy(arr.numpy().byteswap(inplace=False)) 389 | result[k] = arr 390 | 391 | return result 392 | 393 | 394 | def _tobytes(tensor: torch.Tensor, name: str) -> bytes: 395 | if tensor.layout != torch.strided: 396 | raise ValueError( 397 | f"You are trying to save a sparse tensor: `{name}` which this library does not support." 398 | " You can make it a dense tensor before saving with `.to_dense()` but be aware this might" 399 | " make a much larger file than needed." 400 | ) 401 | 402 | if not tensor.is_contiguous(): 403 | raise ValueError( 404 | f"You are trying to save a non contiguous tensor: `{name}` which is not allowed. It either means you" 405 | " are trying to save tensors which are reference of each other in which case it's recommended to save" 406 | " only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to" 407 | " pack it before saving." 408 | ) 409 | if tensor.device.type != "cpu": 410 | # Moving tensor to cpu before saving 411 | tensor = tensor.to("cpu") 412 | 413 | import ctypes 414 | 415 | import numpy as np 416 | 417 | # When shape is empty (scalar), np.prod returns a float 418 | # we need a int for the following calculations 419 | length = int(np.prod(tensor.shape).item()) 420 | bytes_per_item = _SIZE[tensor.dtype] 421 | 422 | total_bytes = length * bytes_per_item 423 | 424 | ptr = tensor.data_ptr() 425 | if ptr == 0: 426 | return b"" 427 | newptr = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_ubyte)) 428 | data = np.ctypeslib.as_array(newptr, (total_bytes,)) # no internal copy 429 | if sys.byteorder == "big": 430 | NPDTYPES = { 431 | torch.int64: np.int64, 432 | torch.float32: np.float32, 433 | torch.int32: np.int32, 434 | # XXX: This is ok because both have the same width 435 | torch.bfloat16: np.float16, 436 | torch.float16: np.float16, 437 | torch.int16: np.int16, 438 | torch.uint8: np.uint8, 439 | torch.int8: np.int8, 440 | torch.bool: bool, 441 | torch.float64: np.float64, 442 | # XXX: This is ok because both have the same width and byteswap is a no-op anyway 443 | _float8_e4m3fn: np.uint8, 444 | _float8_e5m2: np.uint8, 445 | } 446 | npdtype = NPDTYPES[tensor.dtype] 447 | # Not in place as that would potentially modify a live running model 448 | data = data.view(npdtype).byteswap(inplace=False) 449 | return data.tobytes() 450 | 451 | 452 | def _flatten(tensors: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, Any]]: 453 | if not isinstance(tensors, dict): 454 | raise ValueError(f"Expected a dict of [str, torch.Tensor] but received {type(tensors)}") 455 | 456 | invalid_tensors = [] 457 | for k, v in tensors.items(): 458 | if not isinstance(v, torch.Tensor): 459 | raise ValueError(f"Key `{k}` is invalid, expected torch.Tensor but received {type(v)}") 460 | 461 | if v.layout != torch.strided: 462 | invalid_tensors.append(k) 463 | if invalid_tensors: 464 | raise ValueError( 465 | f"You are trying to save a sparse tensors: `{invalid_tensors}` which this library does not support." 466 | " You can make it a dense tensor before saving with `.to_dense()` but be aware this might" 467 | " make a much larger file than needed." 468 | ) 469 | 470 | shared_pointers = _find_shared_tensors(tensors) 471 | failing = [] 472 | for names in shared_pointers: 473 | if len(names) > 1: 474 | failing.append(names) 475 | 476 | if failing: 477 | raise RuntimeError( 478 | f""" 479 | Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: {failing}. 480 | A potential way to correctly save your model is to use `save_model`. 481 | More information at https://huggingface.co/docs/safetensors/torch_shared_tensors 482 | """ 483 | ) 484 | 485 | return { 486 | k: { 487 | "dtype": str(v.dtype).split(".")[-1], 488 | "shape": v.shape, 489 | "data": _tobytes(v, k), 490 | } 491 | for k, v in tensors.items() 492 | } 493 | -------------------------------------------------------------------------------- /Newbook.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | from tkinter import scrolledtext 4 | from transformers import GPT2LMHeadModel, GPT2Tokenizer 5 | 6 | 7 | 8 | class NovelGeneratorApp(tk.Tk): 9 | def __init__(self): 10 | super().__init__() 11 | self.title("Novel Generator") 12 | self.geometry("800x600") 13 | 14 | self.create_widgets() 15 | 16 | def create_widgets(self): 17 | self.theme_label = ttk.Label(self, text="Theme:") 18 | self.theme_label.grid(row=0, column=0, sticky="w") 19 | self.theme_entry = ttk.Entry(self, width=50) 20 | self.theme_entry.grid(row=0, column=1, padx=5, pady=5) 21 | 22 | self.synopsis_label = ttk.Label(self, text="Synopsis:") 23 | self.synopsis_label.grid(row=1, column=0, sticky="w") 24 | self.synopsis_text = scrolledtext.ScrolledText(self, width=50, height=5) 25 | self.synopsis_text.grid(row=1, column=1, padx=5, pady=5) 26 | 27 | self.words_label = ttk.Label(self, text="Number of Words:") 28 | self.words_label.grid(row=2, column=0, sticky="w") 29 | self.words_entry = ttk.Entry(self) 30 | self.words_entry.grid(row=2, column=1, padx=5, pady=5) 31 | 32 | self.chapters_label = ttk.Label(self, text="Number of Chapters:") 33 | self.chapters_label.grid(row=3, column=0, sticky="w") 34 | self.chapters_entry = ttk.Entry(self) 35 | self.chapters_entry.grid(row=3, column=1, padx=5, pady=5) 36 | 37 | self.generate_button = ttk.Button(self, text="Generate Novel", command=self.generate_novel) 38 | self.generate_button.grid(row=4, column=0, columnspan=2, pady=10) 39 | 40 | self.generated_novel_label = ttk.Label(self, text="Generated Novel:") 41 | self.generated_novel_label.grid(row=5, column=0, sticky="w") 42 | self.generated_novel_text = scrolledtext.ScrolledText(self, width=80, height=20) 43 | self.generated_novel_text.grid(row=6, column=0, columnspan=2, padx=5, pady=5) 44 | 45 | def generate_novel(self): 46 | theme = self.theme_entry.get() 47 | synopsis = self.synopsis_text.get("1.0", tk.END) 48 | num_words = int(self.words_entry.get()) 49 | num_chapters = int(self.chapters_entry.get()) 50 | 51 | novel_content = self.generate_novel_content(theme, synopsis, num_words, num_chapters) 52 | 53 | self.generated_novel_text.delete("1.0", tk.END) 54 | self.generated_novel_text.insert(tk.END, novel_content) 55 | 56 | def generate_novel_content(self, theme, synopsis, num_words, num_chapters): 57 | novel_content = f"Theme: {theme}\nSynopsis: {synopsis}\n\n" 58 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2") 59 | model = GPT2LMHeadModel.from_pretrained("gpt2") 60 | for chapter in range(1, num_chapters + 1): 61 | novel_content += f"Chapter {chapter}:\n" 62 | chapter_text = self.generate_chapter_content(theme, synopsis, num_words, tokenizer, model) 63 | novel_content += chapter_text + "\n\n" 64 | return novel_content 65 | 66 | def generate_chapter_content(self, theme, synopsis, num_words, tokenizer, model): 67 | input_text = f"{theme}. {synopsis}. " 68 | input_ids = tokenizer.encode(input_text, return_tensors="pt") 69 | max_length = num_words + len(input_ids[0]) 70 | output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) 71 | chapter_text = tokenizer.decode(output[0], skip_special_tokens=True) 72 | return chapter_text 73 | 74 | if __name__ == "__main__": 75 | app = NovelGeneratorApp() 76 | app.mainloop() 77 | -------------------------------------------------------------------------------- /Novel generator.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Talented0Dev/LLM_Project/1bb8195f0705c52be24fa9b77007d3d01989d404/Novel generator.mp4 -------------------------------------------------------------------------------- /bookgenerator.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | from tkinter import scrolledtext 4 | from tkinter import messagebox 5 | import ebooklib 6 | from ebooklib import epub 7 | from transformers import GPT2LMHeadModel, GPT2Tokenizer 8 | # https://www.upwork.com/ab/proposals/job/~01cbcbb64654457e34/apply/ 9 | # pip install torch torchvision torchaudio 10 | class NovelGeneratorApp(tk.Tk): 11 | def __init__(self): 12 | super().__init__() 13 | self.title("Novel Generator") 14 | self.geometry("800x600") 15 | 16 | self.create_widgets() 17 | 18 | def create_widgets(self): 19 | # Theme and Synopsis 20 | self.theme_label = ttk.Label(self, text="Theme:") 21 | self.theme_label.grid(row=0, column=0, sticky="w") 22 | self.theme_entry = ttk.Entry(self, width=50) 23 | self.theme_entry.grid(row=0, column=1, padx=5, pady=5) 24 | 25 | self.synopsis_label = ttk.Label(self, text="Synopsis:") 26 | self.synopsis_label.grid(row=1, column=0, sticky="w") 27 | self.synopsis_text = scrolledtext.ScrolledText(self, width=50, height=5) 28 | self.synopsis_text.grid(row=1, column=1, padx=5, pady=5) 29 | 30 | # Number of Words and Chapters 31 | self.words_label = ttk.Label(self, text="Number of Words:") 32 | self.words_label.grid(row=2, column=0, sticky="w") 33 | self.words_entry = ttk.Entry(self) 34 | self.words_entry.grid(row=2, column=1, padx=5, pady=5) 35 | 36 | self.chapters_label = ttk.Label(self, text="Number of Chapters:") 37 | self.chapters_label.grid(row=3, column=0, sticky="w") 38 | self.chapters_entry = ttk.Entry(self) 39 | self.chapters_entry.grid(row=3, column=1, padx=5, pady=5) 40 | 41 | # Generate Button 42 | self.generate_button = ttk.Button(self, text="Generate Novel", command=self.generate_novel) 43 | self.generate_button.grid(row=4, column=0, columnspan=2, pady=10) 44 | 45 | # Generated Novel Display 46 | self.generated_novel_label = ttk.Label(self, text="Generated Novel:") 47 | self.generated_novel_label.grid(row=5, column=0, sticky="w") 48 | self.generated_novel_text = scrolledtext.ScrolledText(self, width=80, height=20) 49 | self.generated_novel_text.grid(row=6, column=0, columnspan=2, padx=5, pady=5) 50 | 51 | def generate_novel(self): 52 | # Get user input 53 | theme = self.theme_entry.get() 54 | synopsis = self.synopsis_text.get("1.0", tk.END) 55 | num_words = int(self.words_entry.get()) 56 | num_chapters = int(self.chapters_entry.get()) 57 | 58 | # Generate novel content using ChatGPT 59 | novel_content = self.generate_novel_content(theme, synopsis, num_words, num_chapters) 60 | 61 | # Display generated novel 62 | self.generated_novel_text.delete("1.0", tk.END) 63 | self.generated_novel_text.insert(tk.END, novel_content) 64 | 65 | 66 | def generate_novel_content(self, theme, synopsis, num_words, num_chapters): 67 | # Use GPT-2 model for text generation 68 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2") 69 | model = GPT2LMHeadModel.from_pretrained("gpt2") 70 | 71 | # Construct a prompt string based on user input 72 | prompt = f"{theme}. {synopsis}. One day, he experienced a life-changing event. Write a {num_words}-word chapter." 73 | 74 | # Generate novel content using the model 75 | input_ids = tokenizer.encode(prompt, return_tensors="pt") 76 | 77 | max_length = num_words + 100 # Add extra tokens for safety 78 | output = model.generate( 79 | input_ids, 80 | max_length=max_length, 81 | num_return_sequences=num_chapters, 82 | pad_token_id=tokenizer.eos_token_id, 83 | num_beams=5, # Example: Use beam search with 5 beams 84 | early_stopping=True 85 | ) 86 | 87 | # Decode generated sequences 88 | generated_text = tokenizer.decode(output[0], skip_special_tokens=True) 89 | 90 | return generated_text 91 | if __name__ == "__main__": 92 | app = NovelGeneratorApp() 93 | app.mainloop() -------------------------------------------------------------------------------- /sample_text.txt: -------------------------------------------------------------------------------- 1 | waht are -------------------------------------------------------------------------------- /start.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import filedialog 3 | from tkinter import scrolledtext 4 | from transformers import BertForQuestionAnswering, BertTokenizer 5 | 6 | import torch 7 | 8 | class LLMApp(tk.Tk): 9 | def __init__(self): 10 | super().__init__() 11 | self.title("LLM Question Answering App") 12 | self.geometry("600x400") 13 | 14 | self.create_widgets() 15 | 16 | def create_widgets(self): 17 | self.question_label = tk.Label(self, text="Enter your question:") 18 | self.question_label.pack() 19 | 20 | self.question_entry = tk.Entry(self, width=50) 21 | self.question_entry.pack() 22 | 23 | self.load_file_button = tk.Button(self, text="Load File", command=self.load_file) 24 | self.load_file_button.pack() 25 | 26 | self.text_display = scrolledtext.ScrolledText(self, width=60, height=15) 27 | self.text_display.pack() 28 | 29 | self.answer_label = tk.Label(self, text="Answer:") 30 | self.answer_label.pack() 31 | 32 | self.answer_display = tk.Label(self, text="") 33 | self.answer_display.pack() 34 | 35 | self.answer_button = tk.Button(self, text="Get Answer", command=self.get_answer) 36 | self.answer_button.pack() 37 | 38 | def load_file(self): 39 | file_path = filedialog.askopenfilename() 40 | if file_path: 41 | with open(file_path, "r", encoding="utf-8") as file: 42 | text = file.read() 43 | self.text_display.delete(1.0, tk.END) 44 | self.text_display.insert(tk.END, text) 45 | 46 | def get_answer(self): 47 | question = self.question_entry.get() 48 | if not question: 49 | self.answer_display.config(text="Please enter a question.") 50 | return 51 | 52 | file_text = self.text_display.get(1.0, tk.END) 53 | if not file_text.strip(): 54 | self.answer_display.config(text="Please load a file.") 55 | return 56 | 57 | answer = self.answer_question_from_text(file_text, question) 58 | self.answer_display.config(text=answer) 59 | 60 | def answer_question_from_text(self, text, question): 61 | # Specify the directory where you extracted the model files 62 | model_directory = "/path/to/your/model/files" 63 | 64 | # Load the model and tokenizer from the manually downloaded files 65 | model = BertForQuestionAnswering.from_pretrained(model_directory) 66 | tokenizer = BertTokenizer.from_pretrained(model_directory) 67 | 68 | inputs = tokenizer(question, text, return_tensors="pt", max_length=512, truncation=True) 69 | 70 | with torch.no_grad(): 71 | start_scores, end_scores = model(**inputs) 72 | 73 | all_tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) 74 | answer_tokens = all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores) + 1] 75 | answer = tokenizer.convert_tokens_to_string(answer_tokens) 76 | 77 | return answer 78 | 79 | if __name__ == "__main__": 80 | app = LLMApp() 81 | app.mainloop() --------------------------------------------------------------------------------