├── .gitignore
├── LICENSE
├── Note
    ├── DL
    │   ├── dl
    │   │   ├── segment_data.py
    │   │   └── test.py
    │   ├── kernel.py
    │   └── parallel
    │   │   ├── kernel.py
    │   │   └── kernel_pytorch.py
    ├── RL
    │   ├── kernel.py
    │   ├── parallel
    │   │   ├── kernel.py
    │   │   └── kernel_pytorch.py
    │   └── rl
    │   │   ├── __init__.py
    │   │   ├── animate_agent.py
    │   │   ├── noise.py
    │   │   ├── policy.py
    │   │   └── prioritized_replay.py
    ├── models
    │   ├── docs_example
    │   │   ├── DL
    │   │   │   ├── model1.py
    │   │   │   ├── model2.py
    │   │   │   ├── model3.py
    │   │   │   ├── model4.py
    │   │   │   └── model5.py
    │   │   └── RL
    │   │   │   ├── keras
    │   │   │       ├── DDPG.py
    │   │   │       ├── DDPG_HER.py
    │   │   │       ├── DQN.py
    │   │   │       ├── DQN_PR.py
    │   │   │       ├── MADDPG.py
    │   │   │       ├── PPO.py
    │   │   │       └── pool_network
    │   │   │       │   ├── DQN.py
    │   │   │       │   └── DQN_PR.py
    │   │   │   ├── note
    │   │   │       ├── DDPG.py
    │   │   │       ├── DDPG_HER.py
    │   │   │       ├── DQN.py
    │   │   │       ├── DQN_IRL.py
    │   │   │       ├── DQN_PR.py
    │   │   │       ├── MADDPG.py
    │   │   │       ├── PPO.py
    │   │   │       ├── Rainbow.py
    │   │   │       └── pool_network
    │   │   │       │   ├── DDPG_HER.py
    │   │   │       │   ├── DQN.py
    │   │   │       │   ├── DQN_PR.py
    │   │   │       │   ├── MADDPG.py
    │   │   │       │   └── Rainbow.py
    │   │   │   └── pytorch
    │   │   │       ├── DDPG.py
    │   │   │       ├── DDPG_HER.py
    │   │   │       ├── DQN.py
    │   │   │       ├── DoubleDQN.py
    │   │   │       ├── DuelingDQN.py
    │   │   │       ├── MADDPG.py
    │   │   │       └── pool_network
    │   │   │           ├── DDPG.py
    │   │   │           ├── DDPG_HER.py
    │   │   │           ├── DQN.py
    │   │   │           ├── DoubleDQN.py
    │   │   │           ├── DuelingDQN.py
    │   │   │           └── MADDPG.py
    │   ├── note
    │   │   ├── BertModel.py
    │   │   ├── BiT.py
    │   │   ├── CLIP.py
    │   │   ├── ConvNeXt.py
    │   │   ├── ConvNeXtV2.py
    │   │   ├── DenseNet
    │   │   │   ├── DenseNet121.py
    │   │   │   ├── DenseNet169.py
    │   │   │   └── DenseNet201.py
    │   │   ├── EfficientNet.py
    │   │   ├── EfficientNetV2.py
    │   │   ├── Gemma.py
    │   │   ├── Inception
    │   │   │   ├── InceptionResNetV2.py
    │   │   │   └── InceptionV3.py
    │   │   ├── Llama2.py
    │   │   ├── MobileNet.py
    │   │   ├── MobileNetV2.py
    │   │   ├── MobileNetV3.py
    │   │   ├── RegNet.py
    │   │   ├── ResNet
    │   │   │   ├── ResNet101.py
    │   │   │   ├── ResNet152.py
    │   │   │   └── ResNet50.py
    │   │   ├── ResNetRS.py
    │   │   ├── Segformer.py
    │   │   ├── SwiftFormer.py
    │   │   ├── VGG16.py
    │   │   ├── VGG19.py
    │   │   ├── ViT.py
    │   │   ├── Whisper.py
    │   │   └── non_parallel
    │   │   │   ├── BertModel.py
    │   │   │   ├── BiT.py
    │   │   │   ├── CLIP.py
    │   │   │   ├── ConvNeXt.py
    │   │   │   ├── ConvNeXtV2.py
    │   │   │   ├── DenseNet
    │   │   │       ├── DenseNet121.py
    │   │   │       ├── DenseNet169.py
    │   │   │       └── DenseNet201.py
    │   │   │   ├── EfficientNet.py
    │   │   │   ├── EfficientNetV2.py
    │   │   │   ├── GPT2.py
    │   │   │   ├── Gemma.py
    │   │   │   ├── Llama2.py
    │   │   │   ├── MobileNet.py
    │   │   │   ├── MobileNetV2.py
    │   │   │   ├── MobileNetV3.py
    │   │   │   ├── RegNet.py
    │   │   │   ├── ResNet
    │   │   │       ├── ResNet101.py
    │   │   │       ├── ResNet152.py
    │   │   │       └── ResNet50.py
    │   │   │   ├── ResNetRS.py
    │   │   │   ├── Segformer.py
    │   │   │   ├── SwiftFormer.py
    │   │   │   ├── VGG19.py
    │   │   │   ├── ViT.py
    │   │   │   └── Whisper.py
    │   └── tf
    │   │   ├── BEiT.py
    │   │   ├── BEiT2.py
    │   │   ├── BertModel.py
    │   │   ├── BiT.py
    │   │   ├── CCT.py
    │   │   ├── CLIP.py
    │   │   ├── CaiT.py
    │   │   ├── ConViT.py
    │   │   ├── ConvNeXt.py
    │   │   ├── ConvNeXtV2.py
    │   │   ├── CrossViT.py
    │   │   ├── CvT.py
    │   │   ├── DaViT.py
    │   │   ├── DeepViT.py
    │   │   ├── DeiT.py
    │   │   ├── DenseNet
    │   │       ├── DenseNet121.py
    │   │       ├── DenseNet169.py
    │   │       └── DenseNet201.py
    │   │   ├── DiT.py
    │   │   ├── EfficientNet.py
    │   │   ├── EfficientNetV2.py
    │   │   ├── EfficientVit_mit.py
    │   │   ├── EfficientVit_msra.py
    │   │   ├── GCViT.py
    │   │   ├── GCViT_detection.py
    │   │   ├── Gemma.py
    │   │   ├── Llama.py
    │   │   ├── Llama2.py
    │   │   ├── Llama3.py
    │   │   ├── MiT.py
    │   │   ├── Mixtral.py
    │   │   ├── MobileNet.py
    │   │   ├── MobileNetV2.py
    │   │   ├── MobileNetV3.py
    │   │   ├── PVT.py
    │   │   ├── Phi2.py
    │   │   ├── Phi3.py
    │   │   ├── PiT.py
    │   │   ├── RDNet.py
    │   │   ├── RegNet.py
    │   │   ├── ResNet
    │   │       ├── ResNet101.py
    │   │       ├── ResNet152.py
    │   │       └── ResNet50.py
    │   │   ├── ResNetRS.py
    │   │   ├── Segformer.py
    │   │   ├── SwiftFormer.py
    │   │   ├── SwinMLP.py
    │   │   ├── SwinTransformerV2.py
    │   │   ├── VGG19.py
    │   │   ├── ViT.py
    │   │   ├── ViViT.py
    │   │   ├── Whisper.py
    │   │   └── XCiT.py
    ├── nn
    │   ├── Model.py
    │   ├── RL.py
    │   ├── RL_pytorch.py
    │   ├── Sequential.py
    │   ├── __init__.py
    │   ├── accuracy.py
    │   ├── activation.py
    │   ├── assign_param.py
    │   ├── coalesce_sparse.py
    │   ├── conv2d_func.py
    │   ├── cosine_similarity.py
    │   ├── create_additive_causal_mask.py
    │   ├── gather_mm.py
    │   ├── helpers.py
    │   ├── init.py
    │   ├── initializer.py
    │   ├── interpolate.py
    │   ├── lambda_callback.py
    │   ├── layer
    │   │   ├── BiRNN.py
    │   │   ├── BigBird_attention.py
    │   │   ├── BigBird_masks.py
    │   │   ├── ConvRNN.py
    │   │   ├── FAVOR_attention.py
    │   │   ├── GCN.py
    │   │   ├── GRU.py
    │   │   ├── GRUCell.py
    │   │   ├── LSTM.py
    │   │   ├── LSTMCell.py
    │   │   ├── Linformer_self_attention.py
    │   │   ├── LoRALinear.py
    │   │   ├── MoE_layer.py
    │   │   ├── PReLU.py
    │   │   ├── RMSNorm.py
    │   │   ├── RNN.py
    │   │   ├── RNNCell.py
    │   │   ├── RoPE.py
    │   │   ├── SwitchGLU.py
    │   │   ├── TLU.py
    │   │   ├── Transformer.py
    │   │   ├── TransformerDecoder.py
    │   │   ├── TransformerDecoderLayer.py
    │   │   ├── TransformerEncoder.py
    │   │   ├── TransformerEncoderLayer.py
    │   │   ├── adaptive_avg_pooling1d.py
    │   │   ├── adaptive_avg_pooling2d.py
    │   │   ├── adaptive_avg_pooling3d.py
    │   │   ├── adaptive_avgmax_pool.py
    │   │   ├── adaptive_max_pooling1d.py
    │   │   ├── adaptive_max_pooling2d.py
    │   │   ├── adaptive_max_pooling3d.py
    │   │   ├── add.py
    │   │   ├── additive_attention.py
    │   │   ├── alpha_dropout.py
    │   │   ├── attention.py
    │   │   ├── attention2d.py
    │   │   ├── attention_pool.py
    │   │   ├── attention_pool2d.py
    │   │   ├── average.py
    │   │   ├── avg_pool1d.py
    │   │   ├── avg_pool2d.py
    │   │   ├── avg_pool3d.py
    │   │   ├── axial_positional_encoding.py
    │   │   ├── batch_norm.py
    │   │   ├── bilinear.py
    │   │   ├── blur_pool.py
    │   │   ├── bottleneck_attn.py
    │   │   ├── cached_attention.py
    │   │   ├── capsule.py
    │   │   ├── cbam.py
    │   │   ├── classifier.py
    │   │   ├── concat.py
    │   │   ├── conv1d.py
    │   │   ├── conv1d_transpose.py
    │   │   ├── conv2d.py
    │   │   ├── conv2d_transpose.py
    │   │   ├── conv3d.py
    │   │   ├── conv3d_transpose.py
    │   │   ├── conv_bn_act.py
    │   │   ├── cropping1d.py
    │   │   ├── cropping2d.py
    │   │   ├── cropping3d.py
    │   │   ├── dense.py
    │   │   ├── depthwise_conv1d.py
    │   │   ├── depthwise_conv2d.py
    │   │   ├── dropout.py
    │   │   ├── dynamic_tanh.py
    │   │   ├── eca.py
    │   │   ├── einsum_dense.py
    │   │   ├── embedding.py
    │   │   ├── feed_forward_experts.py
    │   │   ├── filter_response_norm.py
    │   │   ├── flatten.py
    │   │   ├── format.py
    │   │   ├── gather_excite.py
    │   │   ├── gaussian_dropout.py
    │   │   ├── gaussian_noise.py
    │   │   ├── global_avg_pool1d.py
    │   │   ├── global_avg_pool2d.py
    │   │   ├── global_avg_pool3d.py
    │   │   ├── global_context.py
    │   │   ├── global_max_pool1d.py
    │   │   ├── global_max_pool2d.py
    │   │   ├── global_max_pool3d.py
    │   │   ├── grn.py
    │   │   ├── group_norm.py
    │   │   ├── grouped_query_attention.py
    │   │   ├── halo_attn.py
    │   │   ├── identity.py
    │   │   ├── image_preprocessing
    │   │   │   ├── center_crop.py
    │   │   │   ├── random_brightness.py
    │   │   │   ├── random_crop.py
    │   │   │   ├── random_height.py
    │   │   │   ├── random_rotation.py
    │   │   │   ├── random_translation.py
    │   │   │   ├── random_width.py
    │   │   │   ├── random_zoom.py
    │   │   │   ├── rescaling.py
    │   │   │   ├── resizing.py
    │   │   │   └── transform.py
    │   │   ├── interpolate.py
    │   │   ├── kernel_attention.py
    │   │   ├── lambda_layer.py
    │   │   ├── layer_norm.py
    │   │   ├── layer_scale.py
    │   │   ├── llama.py
    │   │   ├── lp_pool1d.py
    │   │   ├── lp_pool2d.py
    │   │   ├── lp_pool3d.py
    │   │   ├── masked_lm.py
    │   │   ├── masked_softmax.py
    │   │   ├── masking.py
    │   │   ├── matmul_with_margin.py
    │   │   ├── max_pool1d.py
    │   │   ├── max_pool2d.py
    │   │   ├── max_pool3d.py
    │   │   ├── maximum.py
    │   │   ├── maxout.py
    │   │   ├── minimum.py
    │   │   ├── ml_decoder.py
    │   │   ├── mlp.py
    │   │   ├── multi_cls_heads.py
    │   │   ├── multichannel_attention.py
    │   │   ├── multihead_attention.py
    │   │   ├── multiheadrelative_attention.py
    │   │   ├── multiply.py
    │   │   ├── non_local_attn.py
    │   │   ├── norm.py
    │   │   ├── patch_dropout.py
    │   │   ├── perdimscale_attention.py
    │   │   ├── permute.py
    │   │   ├── pos_embed.py
    │   │   ├── pos_embed_sincos.py
    │   │   ├── position_embedding.py
    │   │   ├── repeat_vector.py
    │   │   ├── reshape.py
    │   │   ├── reuse_multihead_attention.py
    │   │   ├── reversible_residual.py
    │   │   ├── router.py
    │   │   ├── select_topk.py
    │   │   ├── selective_kernel.py
    │   │   ├── self_attention_mask.py
    │   │   ├── separable_conv1d.py
    │   │   ├── separable_conv2d.py
    │   │   ├── softmax.py
    │   │   ├── space_to_depth.py
    │   │   ├── spatial_dropout1d.py
    │   │   ├── spatial_dropout2d.py
    │   │   ├── spatial_dropout3d.py
    │   │   ├── spectral_norm.py
    │   │   ├── split_attn.py
    │   │   ├── squeeze_excite.py
    │   │   ├── stochastic_depth.py
    │   │   ├── subtract.py
    │   │   ├── talking_heads_attention.py
    │   │   ├── thresholded_relu.py
    │   │   ├── two_stream_relative_attention.py
    │   │   ├── unfold.py
    │   │   ├── unit_norm.py
    │   │   ├── up_sampling1d.py
    │   │   ├── up_sampling2d.py
    │   │   ├── up_sampling3d.py
    │   │   ├── vector_quantizer.py
    │   │   ├── vision_transformer.py
    │   │   ├── voting_attention.py
    │   │   ├── zeropadding1d.py
    │   │   ├── zeropadding2d.py
    │   │   └── zeropadding3d.py
    │   ├── lr_finder.py
    │   ├── nan_to_num.py
    │   ├── narrow.py
    │   ├── opt_finder.py
    │   ├── optimizer
    │   │   ├── a2grad.py
    │   │   ├── accsgd.py
    │   │   ├── adabelief.py
    │   │   ├── adabound.py
    │   │   ├── adaboundw.py
    │   │   ├── adafactor_bv.py
    │   │   ├── adagc.py
    │   │   ├── adahessian.py
    │   │   ├── adai.py
    │   │   ├── adaiv2.py
    │   │   ├── adalite.py
    │   │   ├── adam_mini.py
    │   │   ├── adamax.py
    │   │   ├── adamg.py
    │   │   ├── adamod.py
    │   │   ├── adamp.py
    │   │   ├── adan.py
    │   │   ├── adanorm.py
    │   │   ├── adapnm.py
    │   │   ├── adashift.py
    │   │   ├── adasmooth.py
    │   │   ├── ademamix.py
    │   │   ├── adopt.py
    │   │   ├── aggmo.py
    │   │   ├── aida.py
    │   │   ├── alig.py
    │   │   ├── amos.py
    │   │   ├── apollo.py
    │   │   ├── asgd.py
    │   │   ├── avagrad.py
    │   │   ├── base_optimizer.py
    │   │   ├── came.py
    │   │   ├── dadaptadagrad.py
    │   │   ├── dadaptadam.py
    │   │   ├── dadaptadan.py
    │   │   ├── dadaptlion.py
    │   │   ├── dadaptsgd.py
    │   │   ├── diffgrad.py
    │   │   ├── exadam.py
    │   │   ├── fadam.py
    │   │   ├── fira.py
    │   │   ├── focus.py
    │   │   ├── fromage.py
    │   │   ├── galore.py
    │   │   ├── galore_projector.py
    │   │   ├── grams.py
    │   │   ├── gravity.py
    │   │   ├── grokfast.py
    │   │   ├── kate.py
    │   │   ├── kron.py
    │   │   ├── lamb.py
    │   │   ├── laprop.py
    │   │   ├── lars.py
    │   │   ├── lomo.py
    │   │   ├── lookahead.py
    │   │   ├── madgrad.py
    │   │   ├── mars.py
    │   │   ├── msvag.py
    │   │   ├── muon.py
    │   │   ├── nadam.py
    │   │   ├── nadamw.py
    │   │   ├── nero.py
    │   │   ├── nvnovograd.py
    │   │   ├── optimizer.py
    │   │   ├── orthograd.py
    │   │   ├── padam.py
    │   │   ├── parallel
    │   │   │   ├── adabelief.py
    │   │   │   ├── adabound.py
    │   │   │   ├── adaboundw.py
    │   │   │   ├── adalite.py
    │   │   │   ├── adamod.py
    │   │   │   ├── adamp.py
    │   │   │   └── radam.py
    │   │   ├── pcgrad.py
    │   │   ├── pid.py
    │   │   ├── pnm.py
    │   │   ├── prodigy.py
    │   │   ├── qhadam.py
    │   │   ├── qhm.py
    │   │   ├── racs.py
    │   │   ├── radam.py
    │   │   ├── ranger.py
    │   │   ├── ranger2020.py
    │   │   ├── ranger21.py
    │   │   ├── ranger25.py
    │   │   ├── rangerqh.py
    │   │   ├── rangerva.py
    │   │   ├── sam.py
    │   │   ├── scion.py
    │   │   ├── sgdp.py
    │   │   ├── sgdw.py
    │   │   ├── shampoo.py
    │   │   ├── signsgd.py
    │   │   ├── sm3.py
    │   │   ├── soap.py
    │   │   ├── sophia.py
    │   │   ├── spam.py
    │   │   ├── srmm.py
    │   │   ├── swats.py
    │   │   ├── tam.py
    │   │   ├── tiger.py
    │   │   ├── trac.py
    │   │   └── yogi.py
    │   ├── pairwise_distance.py
    │   ├── parallel
    │   │   ├── assign_device.py
    │   │   ├── assign_device_pytorch.py
    │   │   └── optimizer.py
    │   ├── parallel_finder.py
    │   ├── parallel_finder_rl.py
    │   ├── parameter.py
    │   ├── pos_embed.py
    │   ├── positional_encoding.py
    │   ├── restore.py
    │   ├── scaled_dot_product_attention.py
    │   ├── softplus.py
    │   ├── solve_triangular.py
    │   └── sparse_mask.py
    ├── sr.py
    └── version.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Note/DL/dl/segment_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def segment_data(data,labels,process):
5 |     if len(data)!=process:
6 |         data=np.array_split(data,process)
7 |         labels=np.array_split(labels,process)
8 |         return data,labels
9 | 


--------------------------------------------------------------------------------
/Note/RL/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from Note.RL.rl.policy import *
2 | from Note.RL.rl.noise import *
3 | 


--------------------------------------------------------------------------------
/Note/RL/rl/animate_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib.animation as animation
 4 | 
 5 | 
 6 | class animate_agent:
 7 |     def __init__(self,agent,env,platform='tf'):
 8 |         self.agent=agent
 9 |         self.env=env
10 |         self.platform=platform
11 |     
12 |     
13 |     def run_agent(self, max_steps, seed=None):
14 |         state_history = []
15 | 
16 |         steps = 0
17 |         reward_ = 0
18 |         if seed==None:
19 |             state = self.nn.genv.reset()
20 |         else:
21 |             state = self.nn.genv.reset(seed=seed)
22 |         for step in range(max_steps):
23 |             if self.platform=='tf':
24 |                 if not hasattr(self, 'noise'):
25 |                     action = np.argmax(self.agent.nn(state))
26 |                 else:
27 |                     action = self.agent.actor(state).numpy()
28 |             elif self.platform=='pytorch':
29 |                 if not hasattr(self, 'noise'):
30 |                     action = np.argmax(self.agent.nn(state))
31 |                 else:
32 |                     action = self.agent.actor(state).detach().numpy()
33 |             next_state, reward, done, _ = self.env.step(action)
34 |             state_history.append(state)
35 |             steps+=1
36 |             reward_+=reward
37 |             if done:
38 |                 break
39 |             state = next_state
40 |         
41 |         return state_history,reward_,steps
42 |     
43 |     
44 |     def __call__(self, max_steps, mode='rgb_array', save_path=None, fps=None, writer='imagemagick'):
45 |         state_history,reward,steps = self.run_agent(max_steps)
46 |         
47 |         fig = plt.figure()
48 |         ax = fig.add_subplot()
49 |         self.env.reset()
50 |         img = ax.imshow(self.env.render(mode=mode))
51 | 
52 |         def update(frame):
53 |             img.set_array(self.env.render(mode=mode))
54 |             return [img]
55 | 
56 |         ani = animation.FuncAnimation(fig, update, frames=state_history, blit=True)
57 |         plt.show()
58 |         
59 |         print('steps:{0}'.format(steps))
60 |         print('reward:{0}'.format(reward))
61 |         
62 |         if save_path!=None:
63 |             ani.save(save_path, writer=writer, fps=fps)
64 |         return
65 | 


--------------------------------------------------------------------------------
/Note/RL/rl/noise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RandomProcess(object):
 5 |     def reset_states(self):
 6 |         pass
 7 | 
 8 | 
 9 | class AnnealedGaussianProcess(RandomProcess):
10 |     def __init__(self, mu, sigma, sigma_min, n_steps_annealing):
11 |         self.mu = mu
12 |         self.sigma = sigma
13 |         self.n_steps = 0
14 | 
15 |         if sigma_min is not None:
16 |             self.m = -float(sigma - sigma_min) / float(n_steps_annealing)
17 |             self.c = sigma
18 |             self.sigma_min = sigma_min
19 |         else:
20 |             self.m = 0.
21 |             self.c = sigma
22 |             self.sigma_min = sigma
23 | 
24 |     @property
25 |     def current_sigma(self):
26 |         sigma = max(self.sigma_min, self.m * float(self.n_steps) + self.c)
27 |         return sigma
28 | 
29 | 
30 | class GaussianWhiteNoiseProcess(AnnealedGaussianProcess):
31 |     def __init__(self, mu=0., sigma=1., sigma_min=None, n_steps_annealing=1000, size=1):
32 |         super(GaussianWhiteNoiseProcess, self).__init__(mu=mu, sigma=sigma, sigma_min=sigma_min, n_steps_annealing=n_steps_annealing)
33 |         self.size = size
34 | 
35 |     def sample(self):
36 |         sample = np.random.normal(self.mu, self.current_sigma, self.size)
37 |         self.n_steps += 1
38 |         return sample
39 | 
40 | # Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
41 | class OrnsteinUhlenbeckProcess(AnnealedGaussianProcess):
42 |     def __init__(self, theta, mu=0., sigma=1., dt=1e-2, size=1, sigma_min=None, n_steps_annealing=1000):
43 |         super(OrnsteinUhlenbeckProcess, self).__init__(mu=mu, sigma=sigma, sigma_min=sigma_min, n_steps_annealing=n_steps_annealing)
44 |         self.theta = theta
45 |         self.mu = mu
46 |         self.dt = dt
47 |         self.size = size
48 |         self.reset_states()
49 | 
50 |     def sample(self):
51 |         x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.current_sigma * np.sqrt(self.dt) * np.random.normal(size=self.size)
52 |         self.x_prev = x
53 |         self.n_steps += 1
54 |         return x
55 | 
56 |     def reset_states(self):
57 |         self.x_prev = np.random.normal(self.mu,self.current_sigma,self.size)


--------------------------------------------------------------------------------
/Note/models/docs_example/DL/model1.py:
--------------------------------------------------------------------------------
 1 | from Note import nn
 2 | 
 3 | class Model(nn.Model):
 4 |   def __init__(self):
 5 |     super().__init__()
 6 |     self.conv1 = nn.conv2d(32, 3, activation='relu')
 7 |     self.flatten = nn.flatten()
 8 |     self.d1 = nn.dense(128, activation='relu')
 9 |     self.d2 = nn.dense(10)
10 | 
11 |   def __call__(self, x):
12 |     x = self.conv1(x)
13 |     x = self.flatten(x)
14 |     x = self.d1(x)
15 |     return self.d2(x)


--------------------------------------------------------------------------------
/Note/models/docs_example/DL/model2.py:
--------------------------------------------------------------------------------
 1 | from Note import nn
 2 | 
 3 | class Model(nn.Model):
 4 |   def __init__(self):
 5 |     super().__init__()
 6 |     self.layers = nn.Sequential()
 7 |     self.layers.add(nn.conv2d(32, 3, activation='relu'))
 8 |     self.layers.add(nn.max_pool2d())
 9 |     self.layers.add(nn.conv2d(64, 3, activation='relu'))
10 |     self.layers.add(nn.max_pool2d())
11 |     self.layers.add(nn.flatten())
12 |     self.layers.add(nn.dense(64, activation='relu'))
13 |     self.layers.add(nn.dense(10))
14 | 
15 |   def __call__(self, x):
16 |     return self.layers(x)


--------------------------------------------------------------------------------
/Note/models/docs_example/DL/model3.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example demonstrates how to use Note's Adahessian optimizer 
 3 | by modifying the train_step function inherited from the Model class.
 4 | """
 5 | import tensorflow as tf
 6 | from Note import nn
 7 | 
 8 | class Model(nn.Model):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.layers = nn.Sequential()
12 |         self.layers.add(nn.conv2d(32, 3, activation='relu'))
13 |         self.layers.add(nn.max_pool2d())
14 |         self.layers.add(nn.conv2d(64, 3, activation='relu'))
15 |         self.layers.add(nn.max_pool2d())
16 |         self.layers.add(nn.flatten())
17 |         self.layers.add(nn.dense(64, activation='relu'))
18 |         self.layers.add(nn.dense(10))
19 |     
20 |     def __call__(self, x):
21 |         return self.layers(x)
22 | 
23 |     @tf.function(jit_compile=True)
24 |     def train_step(self, train_data, labels, loss_object, train_loss, train_accuracy, optimizer):
25 |         with tf.GradientTape() as tape:
26 |             output = self.__call__(train_data)
27 |             loss = loss_object(labels, output)
28 |         gradients = tape.gradient(loss, self.param)
29 |         optimizer.apply_gradients(zip(gradients, self.param), tape)
30 |         train_loss(loss)
31 |         if train_accuracy!=None:
32 |             acc=train_accuracy(labels, output)
33 |             return loss,acc
34 |         return loss,None


--------------------------------------------------------------------------------
/Note/models/docs_example/DL/model4.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example demonstrates how to use Note's PCGrad class 
 3 | by modifying the train_step function inherited from the Model class.
 4 | """
 5 | import tensorflow as tf
 6 | from Note import nn
 7 | from Note.nn.optimizer.pcgrad import PCGrad
 8 | # from Note.nn.optimizer.pcgrad import PPCGrad
 9 | 
10 | class Model(nn.Model):
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.layers = nn.Sequential()
14 |         self.layers.add(nn.conv2d(32, 3, activation='relu'))
15 |         self.layers.add(nn.max_pool2d())
16 |         self.layers.add(nn.conv2d(64, 3, activation='relu'))
17 |         self.layers.add(nn.max_pool2d())
18 |         self.layers.add(nn.flatten())
19 |         self.layers.add(nn.dense(64, activation='relu'))
20 |         self.layers.add(nn.dense(10))
21 |         self.pcgrad = PCGrad()
22 |         # self.pcgrad = PPCGrad()
23 |     
24 |     def __call__(self, x):
25 |         return self.layers(x)
26 | 
27 |     @tf.function(jit_compile=True)
28 |     def train_step(self, train_data, labels, loss_object, train_loss, train_accuracy, optimizer):
29 |         with tf.GradientTape(persistent=True) as tape:
30 |             output = self.__call__(train_data)
31 |             losses = loss_object(labels, output)
32 |         gradients = self.pcgrad.pc_backward(tape, losses, self.param)
33 |         optimizer.apply_gradients(zip(gradients, self.param), tape)
34 |         loss = train_loss(losses)
35 |         if train_accuracy!=None:
36 |             acc=train_accuracy(labels, output)
37 |             return loss,acc
38 |         return loss,None


--------------------------------------------------------------------------------
/Note/models/docs_example/DL/model5.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example demonstrates how to use Note's LOMO class or AdaLOMO class
 3 | by modifying the train_step function inherited from the Model class.
 4 | """
 5 | import tensorflow as tf
 6 | from Note import nn
 7 | 
 8 | class Model(nn.Model):
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.layers = nn.Sequential()
12 |         self.layers.add(nn.conv2d(32, 3, activation='relu'))
13 |         self.layers.add(nn.max_pool2d())
14 |         self.layers.add(nn.conv2d(64, 3, activation='relu'))
15 |         self.layers.add(nn.max_pool2d())
16 |         self.layers.add(nn.flatten())
17 |         self.layers.add(nn.dense(64, activation='relu'))
18 |         self.layers.add(nn.dense(10))
19 |     
20 |     def __call__(self, x):
21 |         return self.layers(x)
22 | 
23 |     @tf.function(jit_compile=True)
24 |     def train_step(self, train_data, labels, loss_object, train_loss, train_accuracy, optimizer):
25 |         with tf.GradientTape(persistent=True) as tape:
26 |             output = self.__call__(train_data)
27 |             loss = loss_object(labels, output)
28 |         if optimizer.clip_grad_norm is not None and optimizer.clip_grad_norm > 0.0:
29 |             optimizer.grad_norm(tape, loss, self.param)
30 |         optimizer.fused_backward(tape, loss, self.param, lr=optimizer.lr)
31 |         loss = train_loss(loss)
32 |         if train_accuracy!=None:
33 |             acc=train_accuracy(labels, output)
34 |             return loss,acc
35 |         return loss,None


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/DDPG.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class actor(Model):
 9 |     def __init__(self,state_dim,hidden_dim,action_dim,action_bound):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim, activation='tanh'))
14 |         self.action_bound=action_bound
15 |     
16 |     def __call__(self,x):
17 |         x = self.model(x)
18 |         return x*self.action_bound
19 | 
20 | 
21 | class critic(Model):
22 |     def __init__(self,state_dim,hidden_dim,action_dim):
23 |         super().__init__()
24 |         self.model = Sequential()
25 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim+action_dim,), activation='relu'))
26 |         self.model.add(tf.keras.layers.Dense(action_dim))
27 |     
28 |     def __call__(self,x,a):
29 |         cat=tf.concat([x,a],axis=1)
30 |         x=self.model(cat)
31 |         return x
32 | 
33 | 
34 | class DDPG(nn.RL):
35 |     def __init__(self,hidden_dim,sigma,gamma,tau):
36 |         super().__init__()
37 |         self.env=gym.make('Pendulum-v1')
38 |         state_dim=self.env.observation_space.shape[0]
39 |         action_dim=self.env.action_space.shape[0]
40 |         action_bound=self.env.action_space.high[0]
41 |         self.actor=actor(state_dim,hidden_dim,action_dim,action_bound)
42 |         self.critic=critic(state_dim,hidden_dim,action_dim)
43 |         self.target_actor=actor(state_dim,hidden_dim,action_dim,action_bound)
44 |         self.target_critic=critic(state_dim,hidden_dim,action_dim)
45 |         nn.assign_param(self.target_actor.weights,self.actor.weights)
46 |         nn.assign_param(self.target_critic.weights,self.critic.weights)
47 |         self.param=[self.actor.weights,self.critic.weights]
48 |         self.sigma=sigma
49 |         self.gamma=gamma
50 |         self.tau=tau
51 |     
52 |     def action(self,s):
53 |         return self.actor(s)
54 |     
55 |     def __call__(self,s,a,next_s,r,d):
56 |         a=tf.expand_dims(a,axis=1)
57 |         next_q_value=self.target_critic(next_s,self.target_actor(next_s))
58 |         q_target=tf.cast(r,'float32')+self.gamma*next_q_value*(1-tf.cast(d,'float32'))
59 |         actor_loss=-tf.reduce_mean(self.critic(s,self.actor(s)))
60 |         critic_loss=tf.reduce_mean((self.critic(s,a)-q_target)**2)
61 |         return actor_loss+critic_loss
62 |     
63 |     def update_param(self):
64 |         for target_param,param in zip(self.target_actor.weights,self.actor.weights):
65 |             target_param.assign(target_param*(1.0-self.tau)+param*self.tau)
66 |         for target_param,param in zip(self.target_critic.weights,self.critic.weights):
67 |             target_param.assign(target_param*(1.0-self.tau)+param*self.tau)
68 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/DQN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class Qnet(Model):
 9 |     def __init__(self,state_dim, hidden_dim, action_dim):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim))
14 |     
15 |     def __call__(self,x):
16 |         x = self.model(x)
17 |         return x
18 |     
19 |     
20 | class DQN(nn.RL):
21 |     def __init__(self,state_dim,hidden_dim,action_dim):
22 |         super().__init__()
23 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
24 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
25 |         self.param=self.q_net.weights
26 |         self.env=gym.make('CartPole-v0')
27 |     
28 |     def action(self,s):
29 |         return self.q_net(s)
30 |     
31 |     def __call__(self,s,a,next_s,r,d):
32 |         a=tf.expand_dims(a,axis=1)
33 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
34 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
35 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
36 |         TD=(q_value-target)
37 |         return tf.reduce_mean(TD**2)
38 |     
39 |     def update_param(self):
40 |         nn.assign_param(self.target_q_net.weights,self.param)
41 |         return
42 | 


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/DQN_PR.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class Qnet(Model):
 9 |     def __init__(self,state_dim, hidden_dim, action_dim):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim))
14 |     
15 |     def __call__(self,x):
16 |         x = self.model(x)
17 |         return x
18 |     
19 |     
20 | class DQN(nn.RL):
21 |     def __init__(self,state_dim,hidden_dim,action_dim):
22 |         super().__init__()
23 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
24 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
25 |         self.param=self.q_net.weights
26 |         self.env=gym.make('CartPole-v0')
27 |     
28 |     def action(self,s):
29 |         return self.q_net(s)
30 |     
31 |     def __call__(self,s,a,next_s,r,d):
32 |         a=tf.expand_dims(a,axis=1)
33 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
34 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
35 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
36 |         TD=(q_value-target)
37 |         self.prioritized_replay.update_TD(TD)
38 |         return tf.reduce_mean(TD**2)
39 |     
40 |     def update_param(self):
41 |         nn.assign_param(self.target_q_net.weights,self.param)
42 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/PPO.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class actor(Model):
 9 |     def __init__(self,state_dim,hidden_dim,action_dim):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim))
14 |     
15 |     def __call__(self,x):
16 |         x=self.model(x)
17 |         return tf.nn.softmax(x)
18 | 
19 | 
20 | class critic(Model):
21 |     def __init__(self,state_dim,hidden_dim):
22 |         super().__init__()
23 |         self.model = Sequential()
24 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
25 |         self.model.add(tf.keras.layers.Dense(1))
26 |     
27 |     def __call__(self,x):
28 |         x=self.model(x)
29 |         return x
30 |     
31 |     
32 | class PPO(nn.RL):
33 |     def __init__(self,state_dim,hidden_dim,action_dim,clip_eps,alpha):
34 |         super().__init__()
35 |         self.actor=actor(state_dim,hidden_dim,action_dim)
36 |         self.actor_old=actor(state_dim,hidden_dim,action_dim)
37 |         nn.assign_param(self.actor_old.weights,self.actor.weights)
38 |         self.critic=critic(state_dim,hidden_dim)
39 |         self.clip_eps=clip_eps
40 |         self.alpha=alpha
41 |         self.param=[self.actor.weights,self.critic.weights]
42 |         self.env=gym.make('CartPole-v0')
43 |     
44 |     def action(self,s):
45 |         return self.actor_old(s)
46 |     
47 |     def __call__(self,s,a,next_s,r,d):
48 |         a=tf.expand_dims(a,axis=1)
49 |         action_prob=tf.gather(self.actor(s),a,axis=1,batch_dims=1)
50 |         action_prob_old=tf.gather(self.actor_old(s),a,axis=1,batch_dims=1)
51 |         raito=action_prob/action_prob_old
52 |         value=self.critic(s)
53 |         value_tar=tf.cast(r,'float32')+0.98*self.critic(next_s)*(1-tf.cast(d,'float32'))
54 |         TD=value_tar-value
55 |         sur1=raito*TD
56 |         sur2=tf.clip_by_value(raito,clip_value_min=1-self.clip_eps,clip_value_max=1+self.clip_eps)*TD
57 |         clip_loss=-tf.math.minimum(sur1,sur2)
58 |         entropy=action_prob*tf.math.log(action_prob+1e-8)
59 |         clip_loss=clip_loss-self.alpha*entropy
60 |         return tf.reduce_mean(clip_loss)+tf.reduce_mean((TD)**2)
61 |     
62 |     def update_param(self):
63 |         nn.assign_param(self.actor_old.weights, self.actor.weights)
64 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/pool_network/DQN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class Qnet(Model):
 9 |     def __init__(self,state_dim, hidden_dim, action_dim):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim))
14 |     
15 |     def __call__(self,x):
16 |         x = self.model(x)
17 |         return x
18 |     
19 |     
20 | class DQN(nn.RL):
21 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
22 |         super().__init__()
23 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
24 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
25 |         self.param=self.q_net.weights
26 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
27 |     
28 |     def action(self,s):
29 |         return self.q_net(s)
30 |     
31 |     def __call__(self,s,a,next_s,r,d):
32 |         a=tf.expand_dims(a,axis=1)
33 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
34 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
35 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
36 |         TD=(q_value-target)
37 |         return tf.reduce_mean(TD**2)
38 |     
39 |     def update_param(self):
40 |         nn.assign_param(self.target_q_net.weights,self.param)
41 |         return
42 | 


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/keras/pool_network/DQN_PR.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from keras.models import Sequential
 4 | from keras import Model
 5 | import gym
 6 | 
 7 | 
 8 | class Qnet(Model):
 9 |     def __init__(self,state_dim, hidden_dim, action_dim):
10 |         super().__init__()
11 |         self.model = Sequential()
12 |         self.model.add(tf.keras.layers.Dense(hidden_dim, input_shape=(state_dim,), activation='relu'))
13 |         self.model.add(tf.keras.layers.Dense(action_dim))
14 |     
15 |     def __call__(self,x):
16 |         x = self.model(x)
17 |         return x
18 |     
19 |     
20 | class DQN(nn.RL):
21 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
22 |         super().__init__()
23 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
24 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
25 |         self.param=self.q_net.weights
26 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
27 |     
28 |     def action(self,s):
29 |         return self.q_net(s)
30 |     
31 |     def __call__(self,s,a,next_s,r,d):
32 |         a=tf.expand_dims(a,axis=1)
33 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
34 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
35 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
36 |         TD=(q_value-target)
37 |         self.prioritized_replay.update_TD(TD)
38 |         return tf.reduce_mean(TD**2)
39 |     
40 |     def update_param(self):
41 |         nn.assign_param(self.target_q_net.weights,self.param)
42 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/DDPG.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class actor(nn.Model):
 7 |     def __init__(self,state_dim,hidden_dim,action_dim,action_bound):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim, activation='tanh')
11 |         self.action_bound=action_bound
12 |     
13 |     def __call__(self,x):
14 |         x = self.dense1(x)
15 |         return self.dense2(x)*self.action_bound
16 | 
17 | 
18 | class critic(nn.Model):
19 |     def __init__(self,state_dim,hidden_dim,action_dim):
20 |         super().__init__()
21 |         self.dense1 = nn.dense(hidden_dim, state_dim+action_dim, activation='relu')
22 |         self.dense2 = nn.dense(action_dim, hidden_dim)
23 |     
24 |     def __call__(self,x,a):
25 |         cat=tf.concat([x,a],axis=1)
26 |         x=self.dense1(cat)
27 |         return self.dense2(x)
28 | 
29 | 
30 | class DDPG(nn.RL):
31 |     def __init__(self,hidden_dim,sigma,gamma,tau):
32 |         super().__init__()
33 |         self.env=gym.make('Pendulum-v1')
34 |         state_dim=self.env.observation_space.shape[0]
35 |         action_dim=self.env.action_space.shape[0]
36 |         action_bound=self.env.action_space.high[0]
37 |         self.actor=actor(state_dim,hidden_dim,action_dim,action_bound)
38 |         self.critic=critic(state_dim,hidden_dim,action_dim)
39 |         self.target_actor=actor(state_dim,hidden_dim,action_dim,action_bound)
40 |         self.target_critic=critic(state_dim,hidden_dim,action_dim)
41 |         nn.assign_param(self.target_actor.param,self.actor.param)
42 |         nn.assign_param(self.target_critic.param,self.critic.param)
43 |         self.param=[self.actor.param,self.critic.param]
44 |         self.sigma=sigma
45 |         self.gamma=gamma
46 |         self.tau=tau
47 |     
48 |     def action(self,s):
49 |         return self.actor(s)
50 |     
51 |     def __call__(self,s,a,next_s,r,d):
52 |         a=tf.expand_dims(a,axis=1)
53 |         next_q_value=self.target_critic(next_s,self.target_actor(next_s))
54 |         q_target=tf.cast(r,'float32')+self.gamma*next_q_value*(1-tf.cast(d,'float32'))
55 |         actor_loss=-tf.reduce_mean(self.critic(s,self.actor(s)))
56 |         critic_loss=tf.reduce_mean((self.critic(s,a)-q_target)**2)
57 |         return actor_loss+critic_loss
58 |     
59 |     def update_param(self):
60 |         for target_param,param in zip(self.target_actor.param,self.actor.param):
61 |             target_param.assign(target_param*(1.0-self.tau)+param*self.tau)
62 |         for target_param,param in zip(self.target_critic.param,self.critic.param):
63 |             target_param.assign(target_param*(1.0-self.tau)+param*self.tau)
64 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/DQN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class Qnet(nn.Model):
 7 |     def __init__(self,state_dim, hidden_dim, action_dim):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim)
11 |     
12 |     def __call__(self,x):
13 |         x = self.dense2(self.dense1(x))
14 |         return x
15 |     
16 |     
17 | class DQN(nn.RL):
18 |     def __init__(self,state_dim,hidden_dim,action_dim):
19 |         super().__init__()
20 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
21 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
22 |         self.param=self.q_net.param
23 |         self.env=gym.make('CartPole-v0')
24 |     
25 |     def action(self,s):
26 |         return self.q_net(s)
27 |     
28 |     def __call__(self,s,a,next_s,r,d):
29 |         a=tf.expand_dims(a,axis=1)
30 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
31 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
32 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
33 |         TD=(q_value-target)
34 |         return tf.reduce_mean(TD**2)
35 |     
36 |     def update_param(self):
37 |         nn.assign_param(self.target_q_net.param,self.param)
38 |         return
39 | 


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/DQN_PR.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class Qnet(nn.Model):
 7 |     def __init__(self,state_dim, hidden_dim, action_dim):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim)
11 |     
12 |     def __call__(self,x):
13 |         x = self.dense2(self.dense1(x))
14 |         return x
15 |     
16 |     
17 | class DQN(nn.RL):
18 |     def __init__(self,state_dim,hidden_dim,action_dim):
19 |         super().__init__()
20 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
21 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
22 |         self.param=self.q_net.param
23 |         self.env=gym.make('CartPole-v0')
24 |     
25 |     def action(self,s):
26 |         return self.q_net(s)
27 |     
28 |     def __call__(self,s,a,next_s,r,d):
29 |         a=tf.expand_dims(a,axis=1)
30 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
31 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
32 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
33 |         TD=(q_value-target)
34 |         self.prioritized_replay.update_TD(TD)
35 |         return tf.reduce_mean(TD**2)
36 |     
37 |     def update_param(self):
38 |         nn.assign_param(self.target_q_net.param,self.param)
39 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/PPO.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class actor(nn.Model):
 7 |     def __init__(self,state_dim,hidden_dim,action_dim):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim)
11 |     
12 |     def __call__(self,x):
13 |         x=self.dense1(x)
14 |         return tf.nn.softmax(self.dense2(x))
15 | 
16 | 
17 | class critic(nn.Model):
18 |     def __init__(self,state_dim,hidden_dim):
19 |         super().__init__()
20 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
21 |         self.dense2 = nn.dense(1, hidden_dim)
22 |     
23 |     def __call__(self,x):
24 |         x=self.dense1(x)
25 |         return self.dense2(x)
26 |     
27 |     
28 | class PPO(nn.RL):
29 |     def __init__(self,state_dim,hidden_dim,action_dim,clip_eps,alpha):
30 |         super().__init__()
31 |         self.actor=actor(state_dim,hidden_dim,action_dim)
32 |         self.actor_old=actor(state_dim,hidden_dim,action_dim)
33 |         nn.assign_param(self.actor_old.param,self.actor.param)
34 |         self.critic=critic(state_dim,hidden_dim)
35 |         self.clip_eps=clip_eps
36 |         self.alpha=alpha
37 |         self.param=[self.actor.param,self.critic.param]
38 |         self.env=gym.make('CartPole-v0')
39 |     
40 |     def action(self,s):
41 |         return self.actor_old(s)
42 |     
43 |     def __call__(self,s,a,next_s,r,d):
44 |         a=tf.expand_dims(a,axis=1)
45 |         action_prob=tf.gather(self.actor(s),a,axis=1,batch_dims=1)
46 |         action_prob_old=tf.gather(self.actor_old(s),a,axis=1,batch_dims=1)
47 |         raito=action_prob/action_prob_old
48 |         value=self.critic(s)
49 |         value_tar=tf.cast(r,'float32')+0.98*self.critic(next_s)*(1-tf.cast(d,'float32'))
50 |         TD=value_tar-value
51 |         sur1=raito*TD
52 |         sur2=tf.clip_by_value(raito,clip_value_min=1-self.clip_eps,clip_value_max=1+self.clip_eps)*TD
53 |         clip_loss=-tf.math.minimum(sur1,sur2)
54 |         entropy=action_prob*tf.math.log(action_prob+1e-8)
55 |         clip_loss=clip_loss-self.alpha*entropy
56 |         return tf.reduce_mean(clip_loss)+tf.reduce_mean((TD)**2)
57 |     
58 |     def update_param(self):
59 |         nn.assign_param(self.actor_old.param, self.actor.param)
60 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/Rainbow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from Note import nn
 4 | import gym
 5 | 
 6 | 
 7 | class NoisyLinear:
 8 |     def __init__(self, out_dim, in_dim, sigma_init=0.017):
 9 |         self.in_dim = in_dim
10 |         self.out_dim = out_dim
11 |         
12 |         self.mu_w = nn.initializer([in_dim, out_dim], ['uniform',-1/np.sqrt(in_dim),1/np.sqrt(in_dim)])
13 |         self.sigma_w = nn.Parameter(tf.ones([in_dim, out_dim])*sigma_init)
14 |         self.mu_b = nn.initializer([out_dim], ['uniform',-1/np.sqrt(in_dim),1/np.sqrt(in_dim)])
15 |         self.sigma_b = nn.Parameter(tf.ones([out_dim])*sigma_init)
16 |         
17 |     def __call__(self, x):
18 |         epsilon_in = self._scale_noise(self.in_dim)
19 |         epsilon_out = self._scale_noise(self.out_dim)
20 |         
21 |         w_noise = tf.multiply(self.sigma_w, tf.einsum('i,j->ij', epsilon_in, epsilon_out))
22 |         b_noise = tf.multiply(self.sigma_b, epsilon_out)
23 |         
24 |         return tf.matmul(x, self.mu_w + w_noise) + (self.mu_b + b_noise)
25 |     
26 |     def _scale_noise(self, size):
27 |         x = tf.random.normal([size])
28 |         return tf.sign(x) * tf.sqrt(tf.abs(x))
29 | 
30 | 
31 | class VAnet(nn.Model):
32 |     def __init__(self,state_dim,hidden_dim,action_dim):
33 |         super().__init__()
34 |         self.fc1=NoisyLinear(hidden_dim,state_dim)
35 |         self.fc_A=NoisyLinear(action_dim,hidden_dim)
36 |         self.fc_V=NoisyLinear(1,hidden_dim)
37 |     
38 |     def __call__(self,x):
39 |         A=self.fc_A(tf.nn.relu(self.fc1(x)))
40 |         V=self.fc_V(tf.nn.relu(self.fc1(x)))
41 |         Q=V+A-tf.expand_dims(tf.reduce_mean(A,axis=1),axis=1)
42 |         return Q
43 |     
44 |     
45 | class Rainbow(nn.RL):
46 |     def __init__(self,state_dim,hidden_dim,action_dim):
47 |         super().__init__()
48 |         self.va_net=VAnet(state_dim,hidden_dim,action_dim)
49 |         self.target_q_net=VAnet(state_dim,hidden_dim,action_dim)
50 |         self.param=self.va_net.param
51 |         self.genv=gym.make('CartPole-v0')
52 |     
53 |     def action(self,s):
54 |         return self.va_net(s)
55 |     
56 |     def loss(self,s,a,next_s,r,d):
57 |         a=tf.expand_dims(a,axis=1)
58 |         q_value=tf.gather(self.va_net(s),a,axis=1,batch_dims=1)
59 |         max_action=tf.expand_dims(tf.argmax(self.va_net(s),axis=1),axis=1)
60 |         next_q_value=tf.gather(self.target_q_net(next_s),max_action,axis=1,batch_dims=1)
61 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
62 |         TD=(q_value-target)
63 |         self.prioritized_replay.update_TD(TD)
64 |         return tf.reduce_mean(TD**2)
65 |     
66 |     def update_param(self):
67 |         nn.assign_param(self.target_q_net.param,self.param)
68 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/pool_network/DQN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class Qnet(nn.Model):
 7 |     def __init__(self,state_dim, hidden_dim, action_dim):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim)
11 |     
12 |     def __call__(self,x):
13 |         x = self.dense2(self.dense1(x))
14 |         return x
15 |     
16 |     
17 | class DQN(nn.RL):
18 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
19 |         super().__init__()
20 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
21 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
22 |         self.param=self.q_net.param
23 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
24 |     
25 |     def action(self,s):
26 |         return self.q_net(s)
27 |     
28 |     def __call__(self,s,a,next_s,r,d):
29 |         a=tf.expand_dims(a,axis=1)
30 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
31 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
32 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
33 |         return tf.reduce_mean((q_value-target)**2)
34 |     
35 |     def update_param(self):
36 |         nn.assign_param(self.target_q_net.param,self.param)
37 |         return
38 | 


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/pool_network/DQN_PR.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import gym
 4 | 
 5 | 
 6 | class Qnet(nn.Model):
 7 |     def __init__(self,state_dim, hidden_dim, action_dim):
 8 |         super().__init__()
 9 |         self.dense1 = nn.dense(hidden_dim, state_dim, activation='relu')
10 |         self.dense2 = nn.dense(action_dim, hidden_dim)
11 |     
12 |     def __call__(self,x):
13 |         x = self.dense2(self.dense1(x))
14 |         return x
15 |     
16 |     
17 | class DQN(nn.RL):
18 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
19 |         super().__init__()
20 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim)
21 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim)
22 |         self.param=self.q_net.param
23 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
24 |     
25 |     def action(self,s):
26 |         return self.q_net(s)
27 |     
28 |     def __call__(self,s,a,next_s,r,d):
29 |         a=tf.expand_dims(a,axis=1)
30 |         q_value=tf.gather(self.q_net(s),a,axis=1,batch_dims=1)
31 |         next_q_value=tf.reduce_max(self.target_q_net(next_s),axis=1)
32 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
33 |         self.prioritized_replay.update_TD(target)
34 |         return tf.reduce_mean((q_value-target)**2)
35 |     
36 |     def update_param(self):
37 |         nn.assign_param(self.target_q_net.param,self.param)
38 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/note/pool_network/Rainbow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from Note import nn
 4 | import gym
 5 | 
 6 | 
 7 | class NoisyLinear:
 8 |     def __init__(self, out_dim, in_dim, sigma_init=0.017):
 9 |         self.in_dim = in_dim
10 |         self.out_dim = out_dim
11 |         
12 |         self.mu_w = nn.initializer([in_dim, out_dim], ['uniform',-1/np.sqrt(in_dim),1/np.sqrt(in_dim)])
13 |         self.sigma_w = nn.Parameter(tf.ones([in_dim, out_dim])*sigma_init)
14 |         self.mu_b = nn.initializer([out_dim], ['uniform',-1/np.sqrt(in_dim),1/np.sqrt(in_dim)])
15 |         self.sigma_b = nn.Parameter(tf.ones([out_dim])*sigma_init)
16 |         
17 |     def __call__(self, x):
18 |         epsilon_in = self._scale_noise(self.in_dim)
19 |         epsilon_out = self._scale_noise(self.out_dim)
20 |         
21 |         w_noise = tf.multiply(self.sigma_w, tf.einsum('i,j->ij', epsilon_in, epsilon_out))
22 |         b_noise = tf.multiply(self.sigma_b, epsilon_out)
23 |         
24 |         return tf.matmul(x, self.mu_w + w_noise) + (self.mu_b + b_noise)
25 |     
26 |     def _scale_noise(self, size):
27 |         x = tf.random.normal([size])
28 |         return tf.sign(x) * tf.sqrt(tf.abs(x))
29 | 
30 | 
31 | class VAnet(nn.Model):
32 |     def __init__(self,state_dim,hidden_dim,action_dim):
33 |         super().__init__()
34 |         self.fc1=NoisyLinear(hidden_dim,state_dim)
35 |         self.fc_A=NoisyLinear(action_dim,hidden_dim)
36 |         self.fc_V=NoisyLinear(1,hidden_dim)
37 |     
38 |     def __call__(self,x):
39 |         A=self.fc_A(tf.nn.relu(self.fc1(x)))
40 |         V=self.fc_V(tf.nn.relu(self.fc1(x)))
41 |         Q=V+A-tf.expand_dims(tf.reduce_mean(A,axis=1),axis=1)
42 |         return Q
43 |     
44 |     
45 | class Rainbow(nn.RL):
46 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
47 |         super().__init__()
48 |         self.va_net=VAnet(state_dim,hidden_dim,action_dim)
49 |         self.target_q_net=VAnet(state_dim,hidden_dim,action_dim)
50 |         self.param=self.va_net.param
51 |         self.genv=[gym.make('CartPole-v0') for _ in range(processes)]
52 |     
53 |     def action(self,s):
54 |         return self.va_net(s)
55 |     
56 |     def loss(self,s,a,next_s,r,d):
57 |         a=tf.expand_dims(a,axis=1)
58 |         q_value=tf.gather(self.va_net(s),a,axis=1,batch_dims=1)
59 |         max_action=tf.expand_dims(tf.argmax(self.va_net(s),axis=1),axis=1)
60 |         next_q_value=tf.gather(self.target_q_net(next_s),max_action,axis=1,batch_dims=1)
61 |         target=tf.cast(r,'float32')+0.98*next_q_value*(1-tf.cast(d,'float32'))
62 |         TD=(q_value-target)
63 |         self.prioritized_replay.update_TD(TD)
64 |         return tf.reduce_mean(TD**2)
65 |     
66 |     def update_param(self):
67 |         nn.assign_param(self.target_q_net.param,self.param)
68 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/DQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Qnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(Qnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc2=torch.nn.Linear(hidden_dim,action_dim)
12 |     
13 |     def forward(self,x):
14 |         x=F.relu(self.fc1(x))
15 |         return self.fc2(x)
16 |     
17 |     
18 | class DQN(nn.RL_pytorch):
19 |     def __init__(self,state_dim,hidden_dim,action_dim):
20 |         super().__init__()
21 |         if torch.cuda.is_available():
22 |             self.device=torch.device('cuda')
23 |         else:
24 |             self.device=torch.device('cpu')
25 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
26 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
27 |         self.param=self.q_net.parameters()
28 |         self.env=gym.make('CartPole-v0') #create environment
29 |     
30 |     def action(self,s):
31 |         return self.q_net(s)
32 |     
33 |     def __call__(self,s,a,next_s,r,d): #loss function,kernel uses it to calculate loss.
34 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
35 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
36 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
37 |         r=torch.tensor(r,dtype=torch.float).to(self.device)
38 |         d=torch.tensor(d,dtype=torch.float).to(self.device)
39 |         q_value=self.q_net(s).gather(1,a)
40 |         next_q_value=self.target_q_net(next_s).max(1)[0]
41 |         target=r+0.98*next_q_value*(1-d)
42 |         return F.mse_loss(q_value,target)
43 |     
44 |     def update_param(self): #update function,kernel uses it to update parameter.
45 |         self.target_q_net.load_state_dict(self.q_net.state_dict())
46 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/DoubleDQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Qnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(Qnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc2=torch.nn.Linear(hidden_dim,action_dim)
12 |     
13 |     def forward(self,x):
14 |         x=F.relu(self.fc1(x))
15 |         return self.fc2(x)
16 |     
17 |     
18 | class DoubleDQN(nn.RL_pytorch):
19 |     def __init__(self,state_dim,hidden_dim,action_dim):
20 |         super().__init__()
21 |         if torch.cuda.is_available():
22 |             self.device=torch.device('cuda')
23 |         else:
24 |             self.device=torch.device('cpu')
25 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
26 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
27 |         self.param=self.q_net.parameters()
28 |         self.env=gym.make('CartPole-v0')
29 |     
30 |     def action(self,s):
31 |         return self.q_net(s)
32 |     
33 |     def __call__(self,s,a,next_s,r,d):
34 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
35 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
36 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
37 |         r=torch.tensor(r,dtype=torch.float).view(-1,1).to(self.device)
38 |         d=torch.tensor(d,dtype=torch.float).view(-1,1).to(self.device)
39 |         q_value=self.q_net(s).gather(1,a)
40 |         max_action=self.q_net(next_s).max(1)[1].view(-1,1)
41 |         next_q_value=self.target_q_net(next_s).gather(1,max_action)
42 |         target=r+0.98*next_q_value*(1-d)
43 |         return F.mse_loss(q_value,target)
44 |     
45 |     def update_param(self):
46 |         self.target_q_net.load_state_dict(self.nn.state_dict())
47 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/DuelingDQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class VAnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(VAnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc_A=torch.nn.Linear(hidden_dim,action_dim)
12 |         self.fc_V=torch.nn.Linear(hidden_dim,1)
13 |     
14 |     def forward(self,x):
15 |         A=self.fc_A(F.relu(self.fc1(x)))
16 |         V=self.fc_V(F.relu(self.fc1(x)))
17 |         Q=V+A-A.mean(1).view(-1,1)
18 |         return Q
19 |     
20 |     
21 | class DuelingDQN(nn.RL_pytorch):
22 |     def __init__(self,state_dim,hidden_dim,action_dim):
23 |         super().__init__()
24 |         if torch.cuda.is_available():
25 |             self.device=torch.device('cuda')
26 |         else:
27 |             self.device=torch.device('cpu')
28 |         self.va_net=VAnet(state_dim,hidden_dim,action_dim).to(self.device)
29 |         self.target_q_net=VAnet(state_dim,hidden_dim,action_dim).to(self.device)
30 |         self.optimizer=torch.optim.Adam(self.nn.parameters(),lr=2e-3)
31 |         self.genv=gym.make('CartPole-v0')
32 |     
33 |     def action(self,s):
34 |         return self.va_net(s)
35 |     
36 |     def loss(self,s,a,next_s,r,d):
37 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
38 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
39 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
40 |         r=torch.tensor(r,dtype=torch.float).view(-1,1).to(self.device)
41 |         d=torch.tensor(d,dtype=torch.float).view(-1,1).to(self.device)
42 |         q_value=self.va_net(s).gather(1,a)
43 |         next_q_value=self.target_q_net(next_s).max(1)[0].view(-1,1)
44 |         target=r+0.98*next_q_value*(1-d)
45 |         return F.mse_loss(q_value,target)
46 |     
47 |     def update_param(self):
48 |         self.target_q_net.load_state_dict(self.nn.state_dict())
49 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/pool_network/DQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Qnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(Qnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc2=torch.nn.Linear(hidden_dim,action_dim)
12 |     
13 |     def forward(self,x):
14 |         x=F.relu(self.fc1(x))
15 |         return self.fc2(x)
16 |     
17 |     
18 | class DQN(nn.RL_pytorch):
19 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
20 |         super().__init__()
21 |         if torch.cuda.is_available():
22 |             self.device=torch.device('cuda')
23 |         else:
24 |             self.device=torch.device('cpu')
25 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
26 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
27 |         self.param=self.q_net.parameters()
28 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)] #create environment
29 |     
30 |     def action(self,s):
31 |         return self.q_net(s)
32 |     
33 |     def __call__(self,s,a,next_s,r,d): #loss function,kernel uses it to calculate loss.
34 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
35 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
36 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
37 |         r=torch.tensor(r,dtype=torch.float).to(self.device)
38 |         d=torch.tensor(d,dtype=torch.float).to(self.device)
39 |         q_value=self.q_net(s).gather(1,a)
40 |         next_q_value=self.target_q_net(next_s).max(1)[0]
41 |         target=r+0.98*next_q_value*(1-d)
42 |         return F.mse_loss(q_value,target)
43 |     
44 |     def update_param(self): #update function,kernel uses it to update parameter.
45 |         self.target_q_net.load_state_dict(self.q_net.state_dict())
46 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/pool_network/DoubleDQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Qnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(Qnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc2=torch.nn.Linear(hidden_dim,action_dim)
12 |     
13 |     def forward(self,x):
14 |         x=F.relu(self.fc1(x))
15 |         return self.fc2(x)
16 |     
17 |     
18 | class DoubleDQN(nn.RL_pytorch):
19 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
20 |         super().__init__()
21 |         if torch.cuda.is_available():
22 |             self.device=torch.device('cuda')
23 |         else:
24 |             self.device=torch.device('cpu')
25 |         self.q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
26 |         self.target_q_net=Qnet(state_dim,hidden_dim,action_dim).to(self.device)
27 |         self.param=self.q_net.parameters()
28 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
29 |     
30 |     def action(self,s):
31 |         return self.q_net(s)
32 |     
33 |     def __call__(self,s,a,next_s,r,d):
34 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
35 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
36 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
37 |         r=torch.tensor(r,dtype=torch.float).view(-1,1).to(self.device)
38 |         d=torch.tensor(d,dtype=torch.float).view(-1,1).to(self.device)
39 |         q_value=self.q_net(s).gather(1,a)
40 |         max_action=self.q_net(next_s).max(1)[1].view(-1,1)
41 |         next_q_value=self.target_q_net(next_s).gather(1,max_action)
42 |         target=r+0.98*next_q_value*(1-d)
43 |         return F.mse_loss(q_value,target)
44 |     
45 |     def update_param(self):
46 |         self.target_q_net.load_state_dict(self.nn.state_dict())
47 |         return


--------------------------------------------------------------------------------
/Note/models/docs_example/RL/pytorch/pool_network/DuelingDQN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Note import nn
 3 | import gym
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class VAnet(torch.nn.Module):
 8 |     def __init__(self,state_dim,hidden_dim,action_dim):
 9 |         super(VAnet,self).__init__()
10 |         self.fc1=torch.nn.Linear(state_dim,hidden_dim)
11 |         self.fc_A=torch.nn.Linear(hidden_dim,action_dim)
12 |         self.fc_V=torch.nn.Linear(hidden_dim,1)
13 |     
14 |     def forward(self,x):
15 |         A=self.fc_A(F.relu(self.fc1(x)))
16 |         V=self.fc_V(F.relu(self.fc1(x)))
17 |         Q=V+A-A.mean(1).view(-1,1)
18 |         return Q
19 |     
20 |     
21 | class DuelingDQN(nn.RL_pytorch):
22 |     def __init__(self,state_dim,hidden_dim,action_dim,processes):
23 |         super().__init__()
24 |         if torch.cuda.is_available():
25 |             self.device=torch.device('cuda')
26 |         else:
27 |             self.device=torch.device('cpu')
28 |         self.va_net=VAnet(state_dim,hidden_dim,action_dim).to(self.device)
29 |         self.target_q_net=VAnet(state_dim,hidden_dim,action_dim).to(self.device)
30 |         self.optimizer=torch.optim.Adam(self.nn.parameters(),lr=2e-3)
31 |         self.env=[gym.make('CartPole-v0') for _ in range(processes)]
32 |     
33 |     def action(self,s):
34 |         return self.va_net(s)
35 |     
36 |     def loss(self,s,a,next_s,r,d):
37 |         s=torch.tensor(s,dtype=torch.float).to(self.device)
38 |         a=torch.tensor(a,dtype=torch.int64).view(-1,1).to(self.device)
39 |         next_s=torch.tensor(next_s,dtype=torch.float).to(self.device)
40 |         r=torch.tensor(r,dtype=torch.float).view(-1,1).to(self.device)
41 |         d=torch.tensor(d,dtype=torch.float).view(-1,1).to(self.device)
42 |         q_value=self.va_net(s).gather(1,a)
43 |         next_q_value=self.target_q_net(next_s).max(1)[0].view(-1,1)
44 |         target=r+0.98*next_q_value*(1-d)
45 |         return F.mse_loss(q_value,target)
46 |     
47 |     def update_param(self):
48 |         self.target_q_net.load_state_dict(self.nn.state_dict())
49 |         return


--------------------------------------------------------------------------------
/Note/models/tf/VGG19.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class VGG19(nn.Model):
 6 |     def __init__(self,include_top=True,pooling=None,classes=1000):
 7 |         super().__init__()
 8 |         self.include_top=include_top
 9 |         self.pooling=pooling
10 |         self.classes=classes
11 |         
12 |         self.layers=nn.Sequential()
13 |         # Block 1
14 |         self.layers.add(nn.conv2d(64,(3,3),3,activation="relu", padding="SAME"))
15 |         self.layers.add(nn.conv2d(64,(3,3),activation="relu", padding="SAME"))
16 |         self.layers.add(nn.max_pool2d((2, 2), strides=(2, 2), padding='VALID'))
17 |         # Block 2
18 |         self.layers.add(nn.conv2d(128,(3,3),activation="relu", padding="SAME"))
19 |         self.layers.add(nn.conv2d(128,(3,3),activation="relu", padding="SAME"))
20 |         self.layers.add(nn.max_pool2d((2, 2), strides=(2, 2), padding='VALID'))
21 |         # Block 3
22 |         self.layers.add(nn.conv2d(256,(3,3),activation="relu", padding="SAME"))
23 |         self.layers.add(nn.conv2d(256,(3,3),activation="relu", padding="SAME"))
24 |         self.layers.add(nn.conv2d(256,(3,3),activation="relu", padding="SAME"))
25 |         self.layers.add(nn.conv2d(256,(3,3),activation="relu", padding="SAME"))
26 |         self.layers.add(nn.max_pool2d((2, 2), strides=(2, 2), padding='VALID'))
27 |         # Block 4
28 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
29 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
30 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
31 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
32 |         self.layers.add(nn.max_pool2d((2, 2), strides=(2, 2), padding='VALID'))
33 |         # Block 5
34 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
35 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
36 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
37 |         self.layers.add(nn.conv2d(512,(3,3),activation="relu", padding="SAME"))
38 |         self.layers.add(nn.max_pool2d((2, 2), strides=(2, 2), padding='VALID'))
39 |         
40 |         self.flatten=nn.flatten
41 |         self.dense1=nn.dense(4096,25088,activation='relu')
42 |         self.dense2=nn.dense(4096,self.dense1.output_size,activation='relu')
43 |         self.head=self.dense(self.classes,self.dense2.output_size)
44 |     
45 |     
46 |     def __call__(self,data):
47 |         x=self.layers(data)
48 |         if self.include_top:
49 |             x=self.flatten(x)
50 |             x=self.dense1(x)
51 |             x=self.dense2(x)
52 |             x=self.head(x)
53 |         else:
54 |             if self.pooling=="avg":
55 |                 data = tf.math.reduce_mean(data, axis=[1, 2])
56 |             elif self.pooling=="max":
57 |                 data = tf.math.reduce_max(data, axis=[1, 2])
58 |         return x


--------------------------------------------------------------------------------
/Note/nn/Sequential.py:
--------------------------------------------------------------------------------
 1 | class Sequential:
 2 |     def __init__(self):
 3 |         self.layer=[]
 4 |         self.param=[]
 5 |         self.saved_data=[]
 6 |         self.save_data_flag=[]
 7 |         self.use_data_flag=[]
 8 |         self.save_data_count=0
 9 |         self.output_size=None
10 |         self.train_flag=True
11 |     
12 |     
13 |     def add(self,layer,save_data=False,use_data=False):
14 |         if type(layer)!=list:
15 |             if save_data==True:
16 |                 self.save_data_count+=1
17 |             if use_data==True and hasattr(layer,'save_data_count'):
18 |                 layer.save_data_count=self.save_data_count
19 |             if use_data==True:
20 |                 self.save_data_count=0
21 |             self.layer.append(layer)
22 |             if hasattr(layer,'param'):
23 |                 self.param.extend(layer.param)
24 |             if hasattr(layer,'output_size'):
25 |                 self.output_size=layer.output_size
26 |             self.save_data_flag.append(save_data)
27 |             self.use_data_flag.append(use_data)
28 |         else:
29 |             for layer in layer:
30 |                 self.layer.append(layer)
31 |                 if hasattr(layer,'param'):
32 |                     self.param.extend(layer.param)
33 |                 if hasattr(layer,'output_size'):
34 |                     self.output_size=layer.output_size
35 |         return
36 |     
37 |     
38 |     def __call__(self,data,training=True):
39 |         for i,layer in enumerate(self.layer):
40 |             if not hasattr(layer,'train_flag'):
41 |                 if len(self.use_data_flag)==0 or self.use_data_flag[i]==False:
42 |                     data=layer(data)
43 |                 else:
44 |                     if hasattr(layer,'save_data_count'):
45 |                         data=layer(self.saved_data)
46 |                     else:
47 |                         data=layer(data,self.saved_data.pop(0))
48 |             else:
49 |                 if len(self.use_data_flag)==0 or self.use_data_flag[i]==False:
50 |                     data=layer(data,training)
51 |                 else:
52 |                     if hasattr(layer,'save_data_count'):
53 |                         data=layer(self.saved_data,training)
54 |                     else:
55 |                         data=layer(data,self.saved_data.pop(0),training)
56 |             if len(self.save_data_flag)>0 and self.save_data_flag[i]==True:
57 |                 self.saved_data.append(data)
58 |         return data
59 | 


--------------------------------------------------------------------------------
/Note/nn/assign_param.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.ops import state_ops
 2 | from tensorflow.python.util import nest
 3 | 
 4 | 
 5 | def assign_param(param1,param2):
 6 |     parameter_flat1=nest.flatten(param1)
 7 |     parameter_flat2=nest.flatten(param2)
 8 |     for i in range(len(parameter_flat1)):
 9 |         state_ops.assign(parameter_flat1[i],parameter_flat2[i])
10 |     return
11 | 


--------------------------------------------------------------------------------
/Note/nn/coalesce_sparse.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def coalesce_sparse(sp: tf.SparseTensor) -> tf.SparseTensor:
 5 |     dense_shape = tf.cast(sp.dense_shape, tf.int64)
 6 |     multipliers = tf.concat([
 7 |         tf.math.cumprod(dense_shape[1:], reverse=False),
 8 |         tf.constant([1], dtype=tf.int64)
 9 |     ], axis=0)
10 |     linear_idx = tf.reduce_sum(sp.indices * multipliers, axis=1)
11 |     unique_idx, segment_ids = tf.unique(linear_idx)
12 |     summed_vals = tf.math.unsorted_segment_sum(
13 |         sp.values, segment_ids, tf.shape(unique_idx)[0]
14 |     )
15 |     unraveled = tf.unravel_index(unique_idx, sp.dense_shape)
16 |     new_indices = tf.stack(unraveled, axis=1)
17 |     return tf.SparseTensor(new_indices, summed_vals, sp.dense_shape)


--------------------------------------------------------------------------------
/Note/nn/conv2d_func.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | def conv2d_func(input, weight, bias=None, strides=1, padding=0, dilations=1, groups=1):
 6 |     if not isinstance(padding,str):
 7 |         x = nn.zeropadding2d(padding=padding)(input)
 8 |         padding = 'VALID'
 9 |     if groups == 1:
10 |         if bias:
11 |             x = tf.nn.conv2d(x, weight, strides, padding, dilations=dilations) + bias
12 |         else:
13 |             x = tf.nn.conv2d(x, weight, strides, padding, dilations=dilations)
14 |     else:
15 |         input_groups = tf.split(input, num_or_size_splits=groups, axis=-1)
16 |         weight_groups = tf.split(weight, num_or_size_splits=groups, axis=-1)
17 |         output_groups = []
18 |         for i in range(groups):
19 |             x = tf.nn.conv2d(input_groups[i], weight_groups[i], strides, padding, dilations=dilations)
20 |             output_groups.append(x)
21 |         x = tf.concat(output_groups, axis=-1)
22 |         if bias:
23 |             x = x + bias
24 |     return x


--------------------------------------------------------------------------------
/Note/nn/cosine_similarity.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def cosine_similarity(x1, x2, axis=1, eps=1e-8):
 5 |     w12 = tf.reduce_sum(tf.multiply(x1, x2), axis=axis)
 6 |     w1 = tf.reduce_sum(tf.multiply(x1, x1), axis=axis)
 7 |     w2 = tf.reduce_sum(tf.multiply(x2, x2), axis=axis)
 8 |     n12 = tf.sqrt(clip(w1 * w2, eps * eps))
 9 |     cos_sim = w12 / n12
10 |     return cos_sim
11 | 
12 | def clip(x, min):
13 |     x_dtype = x.dtype
14 |     if x_dtype == tf.int32:
15 |         max = np.iinfo(np.int32).max - 2**7
16 |     elif x_dtype == tf.int64:
17 |         max = np.iinfo(np.int64).max - 2**39
18 |     elif x_dtype == tf.float16:
19 |         max = float(np.finfo(np.float16).max)
20 |     else:
21 |         max = float(np.finfo(np.float32).max)
22 | 
23 |     return tf.clip_by_value(x, min, max)


--------------------------------------------------------------------------------
/Note/nn/create_additive_causal_mask.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | def create_additive_causal_mask(N, dtype = tf.float32):
4 |     indices = tf.range(N)
5 |     mask = indices[:, None] < indices[None]
6 |     # usually inf but 1e9 is as good and softmax(full(1e9)) != nan
7 |     # TODO: Should replace this with finfo(dtype).min
8 |     mask = tf.cast(mask, dtype) * -1e9
9 |     return mask


--------------------------------------------------------------------------------
/Note/nn/gather_mm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def gather_mm(a, b, idx_b):
 4 |     """
 5 |     Gather data according to the given indices and perform matrix multiplication.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     a : tf.Tensor
10 |         A 3-D tensor of shape (N, M, D1) or a 2-D tensor of shape (N, D1)
11 |     b : tf.Tensor
12 |         A 3-D tensor of shape (R, D1, D2)
13 |     idx_b : tf.Tensor, optional
14 |         A 1-D integer tensor of shape (N,)
15 | 
16 |     Returns
17 |     -------
18 |     tf.Tensor
19 |         The output dense matrix of shape (N, M, D2) if a is 3-D, or (N, D2) if a is 2-D
20 |     """
21 |     # Gather the appropriate slices from b according to idx_b
22 |     gathered_b = tf.gather(b, idx_b)
23 |     
24 |     # If a is 2-D, expand its dimensions to 3-D for consistent batch matrix multiplication
25 |     if len(a.shape) == 2:
26 |         a = tf.expand_dims(a, axis=1)  # Shape becomes (N, 1, D1)
27 |         expanded = True
28 |     else:
29 |         expanded = False
30 | 
31 |     # Perform the batch matrix multiplication
32 |     result = tf.einsum('nij,njk->nik', a, gathered_b)
33 |     
34 |     # If a was originally 2-D, squeeze the extra dimension
35 |     if expanded:
36 |         result = tf.squeeze(result, axis=1)  # Shape becomes (N, D2)
37 |     
38 |     return result
39 | 


--------------------------------------------------------------------------------
/Note/nn/helpers.py:
--------------------------------------------------------------------------------
 1 | """ Layer/Module Helpers
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from itertools import repeat
 6 | import collections.abc
 7 | 
 8 | 
 9 | # From PyTorch internals
10 | def _ntuple(n):
11 |     def parse(x):
12 |         if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
13 |             return tuple(x)
14 |         return tuple(repeat(x, n))
15 |     return parse
16 | 
17 | 
18 | to_1tuple = _ntuple(1)
19 | to_2tuple = _ntuple(2)
20 | to_3tuple = _ntuple(3)
21 | to_4tuple = _ntuple(4)
22 | to_ntuple = _ntuple
23 | 
24 | 
25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
26 |     min_value = min_value or divisor
27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
28 |     # Make sure that round down does not go down by more than 10%.
29 |     if new_v < round_limit * v:
30 |         new_v += divisor
31 |     return new_v
32 | 
33 | 
34 | def extend_tuple(x, n):
35 |     # pads a tuple to specified n by padding with last value
36 |     if not isinstance(x, (tuple, list)):
37 |         x = (x,)
38 |     else:
39 |         x = tuple(x)
40 |     pad_n = n - len(x)
41 |     if pad_n <= 0:
42 |         return x[:n]
43 |     return x + (x[-1],) * pad_n
44 | 


--------------------------------------------------------------------------------
/Note/nn/interpolate.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def interpolate(input, size=None, scale_factor=None, recompute_scale_factor=False, mode='nearest', align_corners=False, antialias=False):
 4 |     # Get input shape
 5 |     input_shape = tf.shape(input)
 6 |     
 7 |     # Compute the new size
 8 |     if size is None and scale_factor is not None:
 9 |         # Compute new size based on scale_factor
10 |         new_size = tf.cast(input_shape[1:3], tf.float32) * scale_factor
11 |     elif size is not None:
12 |         # Use provided size
13 |         new_size = tf.cast(size, tf.float32)
14 |     else:
15 |         raise ValueError("Either size or scale_factor must be defined.")
16 |     
17 |     if recompute_scale_factor:
18 |         if scale_factor is None:
19 |             raise ValueError("scale_factor must be defined if recompute_scale_factor is True.")
20 |         # Recompute the scale factor based on the new size
21 |         scale_factor_height = new_size[0] / tf.cast(input_shape[1], tf.float32)
22 |         scale_factor_width = new_size[1] / tf.cast(input_shape[2], tf.float32)
23 |         new_size = tf.stack([tf.cast(tf.cast(input_shape[1], tf.float32) * scale_factor_height, tf.float32),
24 |                              tf.cast(tf.cast(input_shape[2], tf.float32) * scale_factor_width, tf.float32)])
25 | 
26 |     new_size = tf.cast(new_size, tf.int32)
27 |     
28 |     # Perform the interpolation
29 |     if mode == 'bilinear':
30 |         tf.compat.v1.image.resize_bilinear(input, size=new_size, align_corners=align_corners)
31 |     elif mode == 'nearest':
32 |         tf.compat.v1.image.resize_nearest_neighbor(input, size=new_size, align_corners=align_corners)
33 |     elif mode == 'bicubic':
34 |         tf.compat.v1.image.resize_bicubic(input, size=new_size, align_corners=align_corners)
35 |     else:
36 |         resize_result = tf.image.resize(input, size=new_size, method=mode, antialias=antialias)
37 |     
38 |     return resize_result
39 | 


--------------------------------------------------------------------------------
/Note/nn/lambda_callback.py:
--------------------------------------------------------------------------------
 1 | class LambdaCallback:
 2 |     def __init__(self,
 3 |                  on_train_begin=None,
 4 |                  on_train_end=None,
 5 |                  on_epoch_begin=None,
 6 |                  on_epoch_end=None,
 7 |                  on_episode_begin=None,
 8 |                  on_episode_end=None,
 9 |                  on_batch_begin=None,
10 |                  on_batch_end=None,
11 |                  on_test_begin=None,
12 |                  on_test_end=None):
13 |         self.on_train_begin = on_train_begin
14 |         self.on_train_end = on_train_end
15 |         self.on_epoch_begin = on_epoch_begin
16 |         self.on_epoch_end = on_epoch_end
17 |         self.on_episode_begin = on_episode_begin
18 |         self.on_episode_end = on_episode_end
19 |         self.on_batch_begin = on_batch_begin
20 |         self.on_batch_end = on_batch_end
21 |         self.on_test_begin = on_test_begin
22 |         self.on_test_end = on_test_end
23 | 


--------------------------------------------------------------------------------
/Note/nn/layer/BiRNN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class BiRNN:
 5 |     def __init__(self,fw_cells,bw_cells):
 6 |         # Receive a list of forward and backward RNNCell objects as parameters
 7 |         self.fw_cells=fw_cells
 8 |         self.bw_cells=bw_cells
 9 |         self.output_size=self.fw_cells.output_size+self.bw_cells.output_size
10 |     
11 |     
12 |     def __call__(self,data):
13 |         # Get batch_size from data
14 |         batch_size=tf.shape(data)[0]
15 |         # Reverse the input data along the time dimension to get the backward input data
16 |         data_bw=tf.reverse(data,axis=[1])
17 |         # Define a scan function to calculate the output and state of each time step
18 |         def scan_fn(state,data,cell):
19 |             output,state=cell.output(data,state)
20 |             return output,state
21 |         # Use tf.scan function to scan the forward and backward input data and get the forward and backward output data and state list
22 |         outputs_fw=[]
23 |         states_fw=[]
24 |         outputs_bw=[]
25 |         states_bw=[]
26 |         for i in range(len(self.fw_cells)):
27 |             cell=self.fw_cells[i]
28 |             if i==0: # The first layer uses the original input data
29 |                 output_fw,state_fw=tf.scan(scan_fn,(data,cell),initializer=(tf.zeros([batch_size,32]),tf.zeros([batch_size,32])),swap_memory=True)
30 |             else: # The later layers use the output data of the previous layer
31 |                 output_fw,state_fw=tf.scan(scan_fn,(outputs_fw[-1],cell),initializer=(tf.zeros([batch_size,32]),tf.zeros([batch_size,32])),swap_memory=True)
32 |             outputs_fw.append(output_fw)
33 |             states_fw.append(state_fw)
34 |             cell=self.bw_cells[i]
35 |             if i==0: # The first layer uses the reversed input data
36 |                 output_bw,state_bw=tf.scan(scan_fn,(data_bw,cell),initializer=(tf.zeros([batch_size,32]),tf.zeros([batch_size,32])),swap_memory=True)
37 |             else: # The later layers use the output data of the previous layer
38 |                 output_bw,state_bw=tf.scan(scan_fn,(outputs_bw[-1],cell),initializer=(tf.zeros([batch_size,32]),tf.zeros([batch_size,32])),swap_memory=True)
39 |             outputs_bw.append(output_bw)
40 |             states_bw.append(state_bw)
41 |         # Concatenate the forward and backward outputs and states to get the bidirectional outputs and states tensor
42 |         output=tf.concat([outputs_fw[-1],outputs_bw[-1]],axis=-1) # Shape is [batch_size, seq_length, hidden_size * 2]
43 |         state_fw=states_fw[-1] # Take the forward state of the last layer
44 |         state_bw=states_bw[-1] # Take the backward state of the last layer
45 |         state=tf.concat([state_fw[-1],state_bw[-1]],axis=-1) # Concatenate the forward and backward states of the last time step
46 |         return output,state


--------------------------------------------------------------------------------
/Note/nn/layer/BigBird_masks.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class BigBird_masks:
 4 |   """Creates bigbird attention masks."""
 5 | 
 6 |   def __init__(self, block_size):
 7 |     self._block_size = block_size
 8 | 
 9 |   def __call__(self, data, mask):
10 |     encoder_shape = tf.shape(mask)
11 |     mask = tf.cast(mask, data.dtype)
12 |     batch_size, seq_length = encoder_shape[0], encoder_shape[1]
13 |     # reshape for blocking
14 |     blocked_encoder_mask = tf.reshape(
15 |         mask, (batch_size, seq_length // self._block_size, self._block_size))
16 |     encoder_from_mask = tf.reshape(mask, (batch_size, 1, seq_length, 1))
17 |     encoder_to_mask = tf.reshape(mask, (batch_size, 1, 1, seq_length))
18 | 
19 |     band_mask = create_band_mask_from_inputs(blocked_encoder_mask,
20 |                                              blocked_encoder_mask)
21 |     return [band_mask, encoder_from_mask, encoder_to_mask, blocked_encoder_mask]
22 | 
23 | def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
24 |   """Create 3D attention mask from a 2D tensor mask.
25 | 
26 |   Args:
27 |     from_blocked_mask: 2D Tensor of shape [batch_size,
28 |       from_seq_length//from_block_size, from_block_size].
29 |     to_blocked_mask: int32 Tensor of shape [batch_size,
30 |       to_seq_length//to_block_size, to_block_size].
31 | 
32 |   Returns:
33 |     float Tensor of shape [batch_size, 1, from_seq_length//from_block_size-4,
34 |                            from_block_size,  3*to_block_size].
35 |   """
36 |   exp_blocked_to_pad = tf.concat([
37 |       to_blocked_mask[:, 1:-3], to_blocked_mask[:, 2:-2], to_blocked_mask[:,
38 |                                                                           3:-1]
39 |   ], 2)
40 |   band_mask = tf.einsum("BLQ,BLK->BLQK", from_blocked_mask[:, 2:-2],
41 |                         exp_blocked_to_pad)
42 |   band_mask = tf.expand_dims(band_mask, 1)
43 |   return band_mask
44 | 


--------------------------------------------------------------------------------
/Note/nn/layer/ConvRNN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class ConvRNN:
 5 |     def __init__(self,conv_layer,rnn_layer):
 6 |         # Receive a convolution layer object and an RNN layer object as parameters
 7 |         self.conv_layer=conv_layer
 8 |         self.rnn_layer=rnn_layer
 9 |         self.output_size=rnn_layer.output_size
10 |     
11 |     
12 |     def __call__(self,data):
13 |         # Get the number of timesteps in the input data
14 |         timestep=data.shape[1]
15 |         # Create an empty list to store the convolution results for each timestep
16 |         conv_outputs=[]
17 |         # Perform convolution operations on the input data for each timestep and add the results to the list
18 |         for i in range(timestep):
19 |             conv_output=self.conv_layer(data[:,i])
20 |             conv_outputs.append(conv_output)
21 |         # Convert the list to a tensor with shape [batch_size, timestep, ...]
22 |         conv_outputs=tf.stack(conv_outputs,axis=1)
23 |         # Pass the convolution results to the RNN layer and get the final output
24 |         rnn_output=self.rnn_layer(conv_outputs)
25 |         return rnn_output


--------------------------------------------------------------------------------
/Note/nn/layer/GCN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.layer.dense import dense
 3 | from Note.nn.layer.dropout import dropout
 4 | 
 5 | 
 6 | class GCNLayer:
 7 |     def __init__(self, in_features, out_features, bias=True):
 8 |         self.linear = dense(out_features, in_features, use_bias=bias)
 9 | 
10 |     def __call__(self, x, adj):
11 |         x = self.linear(x)
12 |         return tf.matmul(adj, x)
13 | 
14 | 
15 | class GCN:
16 |     def __init__(self, x_dim, h_dim, out_dim, nb_layers=2, dropout_rate=0.5, bias=True):
17 |         layer_sizes = [x_dim] + [h_dim] * nb_layers + [out_dim]
18 |         self.gcn_layers = [
19 |             GCNLayer(in_dim, out_dim, bias)
20 |             for in_dim, out_dim in zip(layer_sizes[:-1], layer_sizes[1:])
21 |         ]
22 |         self.dropout = dropout(dropout_rate)
23 | 
24 |     def __call__(self, x, adj):
25 |         for layer in self.gcn_layers[:-1]:
26 |             x = tf.nn.relu(layer(x, adj))
27 |             x = self.dropout(x)
28 | 
29 |         x = self.gcn_layers[-1](x, adj)
30 |         return x


--------------------------------------------------------------------------------
/Note/nn/layer/GRUCell.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf # import the TensorFlow library
 2 | from Note import nn
 3 | 
 4 | 
 5 | class GRUCell: # define a class for gated recurrent unit (GRU) cell
 6 |     def __init__(self,weight_shape,weight_initializer='Xavier',bias_initializer='zeros',use_bias=True,trainable=True,dtype='float32'): # define the constructor method
 7 |         self.weight=nn.initializer([weight_shape[0]+weight_shape[1],3*weight_shape[1]],weight_initializer,dtype,trainable) # initialize the weight matrix for all gates and candidate hidden state
 8 |         if use_bias==True: # if use bias is True
 9 |             self.bias=nn.initializer([3*weight_shape[1]],bias_initializer,dtype,trainable) # initialize the bias vector for all gates and candidate hidden state
10 |         self.use_bias=use_bias # set the use bias flag
11 |         self.output_size=weight_shape[-1]
12 |         if use_bias==True: # if use bias is True
13 |             self.param=[self.weight,self.bias] # store the parameters in a list
14 |         else: # if use bias is False
15 |             self.param=[self.weight] # store only the weight matrix in a list
16 |     
17 |     
18 |     def __call__(self,data,state): # define the output method
19 |         if data.dtype!=self.dtype:
20 |             data=tf.cast(data,self.dtype)
21 |         x=tf.concat([data,state],axis=-1) # concatenate the input data and state along the last dimension
22 |         if self.use_bias==True: # if use bias is True
23 |             z=tf.matmul(x,self.weight)+self.bias # calculate the linear transformation of concatenated data and weight matrix, plus bias vector
24 |         else: # if use bias is False
25 |             z=tf.matmul(x,self.weight) # calculate the linear transformation of concatenated data and weight matrix
26 |         r,z,h=tf.split(z,3,axis=-1) # split the linear transformation into three parts: reset gate, update gate and candidate hidden state
27 |         r=tf.nn.sigmoid(r) # apply activation function to the reset gate
28 |         z=tf.nn.sigmoid(z) # apply activation function to the update gate
29 |         h=tf.nn.tanh(h) # apply activation function to the candidate hidden state
30 |         h_new=z*state+(1-z)*h # calculate the new hidden state value by combining the update gate, previous state and candidate hidden state values
31 |         output=h_new # set the output value as the new hidden state value
32 |         return output,h_new # return the output value and the new hidden state value


--------------------------------------------------------------------------------
/Note/nn/layer/LSTMCell.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf # import the TensorFlow library
 2 | from Note import nn
 3 | 
 4 | 
 5 | class LSTMCell: # define a class for long short-term memory (LSTM) cell
 6 |     def __init__(self,weight_shape,weight_initializer='Xavier',bias_initializer='zeros',use_bias=True,trainable=True,dtype='float32'): # define the constructor method
 7 |         self.weight=nn.initializer([weight_shape[0]+weight_shape[1],4*weight_shape[1]],weight_initializer,dtype,trainable) # initialize the weight matrix for all gates and candidate cell state
 8 |         if use_bias==True: # if use bias is True
 9 |             self.bias=nn.initializer([4*weight_shape[1]],bias_initializer,dtype,trainable) # initialize the bias vector for all gates and candidate cell state
10 |         self.use_bias=use_bias # set the use bias flag
11 |         self.output_size=weight_shape[-1]
12 |         if use_bias==True: # if use bias is True
13 |             self.param=[self.weight,self.bias] # store the parameters in a list
14 |         else: # if use bias is False
15 |             self.param=[self.weight] # store only the weight matrix in a list
16 |     
17 |     
18 |     def __call__(self,data,state): # define the output method
19 |         if data.dtype!=self.dtype:
20 |             data=tf.cast(data,self.dtype)
21 |         x=tf.concat([data,state],axis=-1) # concatenate the input data and state along the last dimension
22 |         if self.use_bias==True: # if use bias is True
23 |             z=tf.matmul(x,self.weight)+self.bias # calculate the linear transformation of concatenated data and weight matrix, plus bias vector
24 |         else: # if use bias is False
25 |             z=tf.matmul(x,self.weight) # calculate the linear transformation of concatenated data and weight matrix
26 |         i,f,o,c=tf.split(z,4,axis=-1) # split the linear transformation into four parts: input gate, forget gate, output gate and candidate cell state
27 |         i=tf.nn.sigmoid(i) # apply activation function to the input gate
28 |         f=tf.nn.sigmoid(f) # apply activation function to the forget gate
29 |         o=tf.nn.sigmoid(o) # apply activation function to the output gate
30 |         c=tf.nn.tanh(c) # apply activation function to the candidate cell state
31 |         c_new=i*c+f*state # calculate the new cell state value by combining the input gate, candidate cell state and forget gate multiplied by previous state values
32 |         output=o*tf.nn.tanh(c_new) # calculate the output value by multiplying the output gate and the tanh activation of the new cell state value
33 |         return output,c_new # return the output value and the new cell state value


--------------------------------------------------------------------------------
/Note/nn/layer/LoRALinear.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.layer.dense import dense
 3 | 
 4 | class LoRALinear:
 5 |     @staticmethod
 6 |     def from_linear(linear, rank: int = 8):
 7 |         # TODO remove when input_dims and output_dims are attributes
 8 |         # on linear and quantized linear
 9 |         output_dims, input_dims = linear.weight.shape
10 |         lora_lin = LoRALinear(input_dims, output_dims, rank)
11 |         lora_lin.linear = linear
12 |         return lora_lin
13 | 
14 |     def to_linear(self):
15 |         linear = self.linear
16 |         bias = linear.use_bias
17 |         weight = linear.weight
18 | 
19 |         # Use the same type as the linear weight if not quantized
20 |         dtype = weight.dtype
21 | 
22 |         output_dims, input_dims = weight.shape
23 |         fused_linear = dense(output_dims, input_dims, bias=bias)
24 | 
25 |         lora_b = tf.cast((self.scale * tf.transpose(self.lora_b)), dtype)
26 |         lora_a = tf.cast(tf.transpose(self.lora_a), dtype)
27 |         fused_linear.weight = weight + tf.matmul(lora_b, lora_a)
28 |         if bias:
29 |             fused_linear.bias = linear.bias
30 | 
31 |         return fused_linear
32 | 
33 |     def __init__(
34 |         self,
35 |         input_dims: int,
36 |         output_dims: int,
37 |         lora_rank: int = 8,
38 |         bias: bool = False,
39 |         scale: float = 20.0,
40 |     ):
41 |         # Regular linear layer weights
42 |         self.linear = dense(output_dims, input_dims, bias=bias)
43 | 
44 |         # Scale for low-rank update
45 |         self.scale = scale
46 | 
47 |         # Low rank lora weights
48 |         scale = 1 / tf.math.sqrt(input_dims)
49 |         self.lora_a = tf.random.uniform(
50 |             low=-scale,
51 |             high=scale,
52 |             shape=(input_dims, lora_rank),
53 |         )
54 |         self.lora_b = tf.zeros(shape=(lora_rank, output_dims))
55 | 
56 |     def __call__(self, data):
57 |         dtype = self.linear.weight.dtype
58 |         y = self.linear(tf.cast(data, dtype))
59 |         z = tf.matmul(tf.matmul(data, self.lora_a), self.lora_b)
60 |         return y + self.scale * z


--------------------------------------------------------------------------------
/Note/nn/layer/PReLU.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | 
 5 | class PReLU:
 6 |     """Parametric Rectified Linear Unit.
 7 | 
 8 |     It follows:
 9 | 
10 |     ```
11 |         f(x) = alpha * x for x < 0
12 |         f(x) = x for x >= 0
13 |     ```
14 | 
15 |     where `alpha` is a learned array with the same shape as x.
16 | 
17 |     Input shape:
18 |         Arbitrary. Use the keyword argument `input_shape`
19 |         (tuple of integers, does not include the samples axis)
20 |         when using this layer as the first layer in a model.
21 | 
22 |     Output shape:
23 |         Same shape as the input.
24 | 
25 |     Args:
26 |         alpha_initializer: Initializer function for the weights.
27 |         shared_axes: The axes along which to share learnable
28 |             parameters for the activation function.
29 |             For example, if the incoming feature maps
30 |             are from a 2D convolution
31 |             with output shape `(batch, height, width, channels)`,
32 |             and you wish to share parameters across space
33 |             so that each filter only has one set of parameters,
34 |             set `shared_axes=[1, 2]`.
35 |     """
36 | 
37 |     def __init__(
38 |         self,
39 |         input_shape=None,
40 |         alpha_initializer="zeros",
41 |         shared_axes=None,
42 |         dtype='float32'
43 |     ):
44 |         self.alpha_initializer = alpha_initializer
45 |         if shared_axes is None:
46 |             self.shared_axes = None
47 |         elif not isinstance(shared_axes, (list, tuple)):
48 |             self.shared_axes = [shared_axes]
49 |         else:
50 |             self.shared_axes = list(shared_axes)
51 |         self.dtype=dtype
52 |         self.input_shape=input_shape
53 |         if input_shape is not None:
54 |             param_shape = list(input_shape[1:])
55 |             if self.shared_axes is not None:
56 |                 for i in self.shared_axes:
57 |                     param_shape[i - 1] = 1
58 |             self.alpha = initializer(
59 |                 shape=param_shape,
60 |                 initializer=alpha_initializer,
61 |                 dtype=dtype
62 |             )
63 |             self.param=[self.alpha]
64 | 
65 | 
66 |     def __call__(self, data):
67 |         if data.dtype!=self.dtype:
68 |             data=tf.cast(data,self.dtype)
69 |         if self.input_shape is None:
70 |             self.input_shape=data.shape
71 |             param_shape = list(self.input_shape[1:])
72 |             if self.shared_axes is not None:
73 |                 for i in self.shared_axes:
74 |                     param_shape[i - 1] = 1
75 |             self.alpha = initializer(
76 |                 shape=param_shape,
77 |                 initializer=self.alpha_initializer,
78 |                 dtype=self.dtype
79 |             )
80 |             self.param=[self.alpha]
81 |         pos = tf.nn.relu(data)
82 |         neg = -self.alpha * tf.nn.relu(-data)
83 |         return pos + neg


--------------------------------------------------------------------------------
/Note/nn/layer/RMSNorm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | 
 5 | class RMSNorm:
 6 |     def __init__(self, dims: int, eps: float = 1e-6, dtype='float32'):
 7 |         self.gamma = initializer((dims,), 'ones', dtype)
 8 |         self.eps = eps
 9 |         self.param = [self.gamma]
10 | 
11 |     def __call__(self, x):
12 |         n = tf.math.rsqrt(tf.math.reduce_mean(tf.math.square(x), axis=-1, keepdims=True) + self.eps)
13 |         return self.gamma * x * n


--------------------------------------------------------------------------------
/Note/nn/layer/RNNCell.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf # import the TensorFlow library
 2 | from Note import nn
 3 | from Note.nn.activation import activation_dict # import the activation function dictionary from Note.nn package
 4 | 
 5 | 
 6 | class RNNCell: # define a class for recurrent neural network (RNN) cell
 7 |     def __init__(self,weight_shape,weight_initializer='Xavier',bias_initializer='zeros',activation=None,use_bias=True,trainable=True,dtype='float32'): # define the constructor method
 8 |         self.weight_i=nn.initializer(weight_shape,weight_initializer,dtype,trainable) # initialize the weight matrix for input data
 9 |         self.weight_s=nn.initializer([weight_shape[1],weight_shape[1]],weight_initializer,dtype,trainable) # initialize the weight matrix for previous state
10 |         if use_bias==True: # if use bias is True
11 |             self.bias=nn.initializer([weight_shape[1]],bias_initializer,dtype,trainable) # initialize the bias vector
12 |         self.activation=activation_dict[activation] # get the activation function from the activation dictionary
13 |         self.use_bias=use_bias # set the use bias flag
14 |         self.output_size=weight_shape[-1]
15 |         if use_bias==True: # if use bias is True
16 |             self.param=[self.weight_i,self.weight_s,self.bias] # store the parameters in a list
17 |         else: # if use bias is False
18 |             self.param=[self.weight_i,self.weight_s] # store only the weight matrices in a list
19 |     
20 |     
21 |     def __call__(self,data,state): # define the output method
22 |         if data.dtype!=self.dtype:
23 |             data=tf.cast(data,self.dtype)
24 |         output=tf.matmul(data,self.weight_i)+tf.matmul(state,self.weight_s) # calculate the linear transformation of input data and previous state
25 |         if self.use_bias==True: # if use bias is True
26 |             output=output+self.bias # add the bias vector to the linear transformation
27 |         if self.activation is not None: # if activation function is not None
28 |             output=self.activation(output) # apply activation function to the linear transformation
29 |         return output # return the output value


--------------------------------------------------------------------------------
/Note/nn/layer/RoPE.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class RoPE:
 4 |     def __init__(self, dims: int, traditional: bool = False, base=None):
 5 |         self.dims = dims
 6 |         self.traditional = traditional
 7 |         self.base = base
 8 | 
 9 |     def _compute_rope(self, costheta, sintheta, x):
10 |         x1 = x[..., : self.dims // 2]
11 |         x2 = x[..., self.dims // 2 : self.dims]
12 |         rx1 = x1 * costheta - x2 * sintheta
13 |         rx2 = x1 * sintheta + x2 * costheta
14 | 
15 |         if self.dims < x.shape[-1]:
16 |             rx = tf.concat([rx1, rx2, x[..., self.dims :]], axis=-1)
17 |         else:
18 |             rx = tf.concat([rx1, rx2], axis=-1)
19 | 
20 |         return rx
21 | 
22 |     def _compute_traditional_rope(self, costheta, sintheta, x):
23 |         x1 = x[..., ::2]
24 |         x2 = x[..., 1::2]
25 |         rx1 = x1 * costheta - x2 * sintheta
26 |         rx2 = x1 * sintheta + x2 * costheta
27 | 
28 |         if self.dims < x.shape[-1]:
29 |             raise NotImplementedError(
30 |                 "RoPE doesn't implement partial traditional application"
31 |             )
32 | 
33 |         rx = tf.concat([rx1[..., None], rx2[..., None]], axis=-1)
34 | 
35 |         return rx
36 | 
37 |     def __call__(self, x, offset: int = 0):
38 |         shape = x.shape
39 |         x = tf.reshape(x, (-1, shape[-2], shape[-1]))
40 |         N = x.shape[1] + offset
41 |         costheta, sintheta = RoPE.create_cos_sin_theta(
42 |             N, self.dims, offset=offset, base=self.base, dtype=x.dtype
43 |         )
44 | 
45 |         rope = (
46 |             self._compute_traditional_rope if self.traditional else self._compute_rope
47 |         )
48 |         rx = rope(costheta, sintheta, x)
49 | 
50 |         return tf.reshape(rx, shape)
51 | 
52 |     @staticmethod
53 |     def create_cos_sin_theta(
54 |         N: int,
55 |         D: int,
56 |         offset: int = 0,
57 |         base: float = 10000,
58 |         dtype=tf.float32,
59 |     ):
60 |         D = D // 2
61 |         positions = tf.range(offset, N, dtype=dtype)
62 |         freqs = tf.math.exp(
63 |             -tf.range(0, D, dtype=dtype) * (tf.math.log(base) / D)
64 |         )
65 |         theta = tf.reshape(positions, (-1, 1)) * tf.reshape(freqs, (1, -1))
66 |         costheta = tf.math.cos(theta)
67 |         sintheta = tf.math.sin(theta)
68 | 
69 |         return costheta, sintheta


--------------------------------------------------------------------------------
/Note/nn/layer/SwitchGLU.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | import math
 4 | 
 5 | class SwitchLinear:
 6 |     def __init__(
 7 |         self, input_dims: int, output_dims: int, num_experts: int, bias: bool = True
 8 |     ):
 9 |         scale = math.sqrt(1 / input_dims)
10 |         self.weight = tf.Variable(tf.random.uniform(
11 |             minval=-scale,
12 |             maxval=scale,
13 |             shape=(num_experts, input_dims, output_dims),
14 |         ))
15 |         nn.Model.param.append(self.weight)
16 | 
17 |         self.use_bias=bias
18 |         if bias:
19 |             self.bias = tf.Variable(tf.zeros((num_experts, output_dims)))
20 |             nn.Model.param.append(self.bias)
21 | 
22 |     @property
23 |     def input_dims(self):
24 |         return self.weight.shape[1]
25 | 
26 |     @property
27 |     def output_dims(self):
28 |         return self.weight.shape[2]
29 | 
30 |     @property
31 |     def num_experts(self):
32 |         return self.weight.shape[0]
33 | 
34 |     def __call__(self, x, indices):
35 |         x = nn.gather_mm(x, self.weight, indices)
36 |         if self.use_bias:
37 |             x = x + tf.expand_dims(tf.gather(self.bias, indices), -2)
38 |         return x
39 | 
40 | class SwitchGLU:
41 |     def __init__(
42 |         self,
43 |         input_dims: int,
44 |         hidden_dims: int,
45 |         num_experts: int,
46 |         activation=tf.nn.silu,
47 |         bias: bool = False,
48 |     ):
49 | 
50 |         self.gate_proj = SwitchLinear(input_dims, hidden_dims, num_experts, bias=bias)
51 |         self.up_proj = SwitchLinear(input_dims, hidden_dims, num_experts, bias=bias)
52 |         self.down_proj = SwitchLinear(hidden_dims, input_dims, num_experts, bias=bias)
53 |         self.activation = activation
54 | 
55 |     def __call__(self, x, indices):
56 |         
57 |         x_up = self.up_proj(x, indices)
58 |         x_gate = self.gate_proj(x, indices)
59 |         x = self.down_proj(self.activation(x_gate) * x_up, indices)
60 | 
61 |         return x


--------------------------------------------------------------------------------
/Note/nn/layer/TLU.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | 
 5 | class TLU:
 6 |     r"""Thresholded Linear Unit.
 7 | 
 8 |     An activation function which is similar to ReLU
 9 |     but with a learned threshold that benefits models using FRN(Filter Response
10 |     Normalization). Original paper: https://arxiv.org/pdf/1911.09737.
11 | 
12 |     Input shape:
13 |         Arbitrary. Use the keyword argument `input_shape`
14 |         (tuple of integers, does not include the samples axis)
15 |         when using this layer as the first layer in a model.
16 | 
17 |     Output shape:
18 |         Same shape as the input.
19 | 
20 |     Args:
21 |         affine: `bool`. Whether to make it TLU-Affine or not
22 |             which has the form $\max(x, \alpha*x + \tau)$`
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         input_shape=None,
28 |         affine: bool = False,
29 |         tau_initializer = "zeros",
30 |         alpha_initializer = "zeros",
31 |         dtype='float32'
32 |     ):
33 |         self.affine = affine
34 |         self.tau_initializer = tau_initializer
35 |         if self.affine:
36 |             self.alpha_initializer = alpha_initializer
37 |         self.dtype=dtype
38 |         self.input_shape=input_shape
39 |         if input_shape is not None:
40 |             param_shape = list(input_shape[1:])
41 |             self.tau = initializer(param_shape, self.tau_initializer, dtype)
42 |             if self.affine:
43 |                 self.alpha = initializer(param_shape, self.alpha_initializer, dtype)
44 | 
45 | 
46 |     def __call__(self, data):
47 |         if data.dtype!=self.dtype:
48 |             data=tf.cast(data,self.dtype)
49 |         if self.input_shape is None:
50 |             self.input_shape=data.shape
51 |             param_shape = list(self.input_shape[1:])
52 |             self.tau = initializer(param_shape, self.tau_initializer, self.dtype)
53 |             if self.affine:
54 |                 self.alpha = initializer(param_shape, self.alpha_initializer, self.dtype)
55 |         v = self.alpha * data if self.affine else 0
56 |         return tf.maximum(data, self.tau + v)


--------------------------------------------------------------------------------
/Note/nn/layer/Transformer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.layer.TransformerEncoder import TransformerEncoder
 3 | from Note.nn.layer.TransformerDecoder import TransformerDecoder
 4 | from Note.nn.layer.TransformerEncoderLayer import TransformerEncoderLayer
 5 | from Note.nn.layer.TransformerDecoderLayer import TransformerDecoderLayer
 6 | from Note.nn.layer.layer_norm import layer_norm
 7 | 
 8 | 
 9 | class Transformer:
10 |     def __init__(self, d_model: int = 512, nhead: int = 8, num_encoder_layers: int = 6,
11 |                  num_decoder_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1,
12 |                  activation = tf.nn.relu,
13 |                  custom_encoder = None, custom_decoder = None,
14 |                  layer_norm_eps: float = 1e-5, norm_first: bool = False,
15 |                  bias: bool = True, dtype='float32'):
16 |         if custom_encoder is not None:
17 |             self.encoder = custom_encoder
18 |         else:
19 |             encoder_layers = [TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout,
20 |                                                     activation, layer_norm_eps, norm_first,
21 |                                                     bias) for _ in range(num_encoder_layers)]
22 |             encoder_norm = layer_norm(d_model, epsilon=layer_norm_eps, dtype=dtype)
23 |             self.encoder = TransformerEncoder(encoder_layers, num_encoder_layers, encoder_norm)
24 | 
25 |         if custom_decoder is not None:
26 |             self.decoder = custom_decoder
27 |         else:
28 |             decoder_layers = [TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout,
29 |                                                     activation, layer_norm_eps, norm_first,
30 |                                                     bias) for _ in range(num_decoder_layers)]
31 |             decoder_norm = layer_norm(d_model, epsilon=layer_norm_eps, dtype=dtype)
32 |             self.decoder = TransformerDecoder(decoder_layers, num_decoder_layers, decoder_norm)
33 | 
34 |         self.d_model = d_model
35 |         self.nhead = nhead
36 |         self.dtype=dtype
37 | 
38 | 
39 |     def __call__(self, src, tgt, src_mask = None, tgt_mask = None, memory_mask = None, train_flag=True):
40 |         if src.dtype!=self.dtype:
41 |             src=tf.cast(src,self.dtype)
42 |         if tgt.dtype!=self.dtype:
43 |             tgt=tf.cast(tgt,self.dtype)
44 |         if src_mask is not None and src_mask.dtype!=self.dtype:
45 |             src_mask=tf.cast(src_mask,self.dtype)
46 |         if tgt_mask is not None and tgt_mask.dtype!=self.dtype:
47 |             tgt_mask=tf.cast(tgt_mask,self.dtype) 
48 |         if memory_mask is not None and memory_mask.dtype!=self.dtype:
49 |             memory_mask=tf.cast(memory_mask,self.dtype)
50 |         memory = self.encoder(src, mask=src_mask, train_flag=train_flag)
51 |         output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask, train_flag=train_flag)
52 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/TransformerDecoder.py:
--------------------------------------------------------------------------------
 1 | class TransformerDecoder:
 2 |     def __init__(self, decoder_layers, num_layers, norm=None):
 3 |         self.layers = decoder_layers
 4 |         self.num_layers = num_layers
 5 |         self.norm = norm
 6 | 
 7 | 
 8 |     def __call__(self, tgt, memory, tgt_mask = None,
 9 |                 memory_mask = None, train_flag=True):
10 |         output = tgt
11 | 
12 |         for mod in self.layers:
13 |             output = mod.output(output, memory, tgt_mask=tgt_mask,
14 |                          memory_mask=memory_mask, train_flag=train_flag
15 |                          )
16 | 
17 |         if self.norm is not None:
18 |             output = self.norm.output(output)
19 | 
20 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/TransformerEncoder.py:
--------------------------------------------------------------------------------
 1 | class TransformerEncoder:
 2 |     def __init__(self, encoder_layers, num_layers, norm=None):
 3 |         self.layers = encoder_layers
 4 |         self.num_layers = num_layers
 5 |         self.norm = norm
 6 | 
 7 | 
 8 |     def __call__(
 9 |             self,
10 |             src,
11 |             mask = None,
12 |             train_flag=True
13 |             ):
14 |         output = src
15 | 
16 |         for mod in self.layers:
17 |             output = mod.output(output, src_mask=mask, train_flag=train_flag)
18 | 
19 |         if self.norm is not None:
20 |             output = self.norm.output(output)
21 | 
22 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/TransformerEncoderLayer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.layer.multihead_attention import multihead_attention
 3 | from Note.nn.layer.dense import dense
 4 | from Note.nn.layer.layer_norm import layer_norm
 5 | from Note.nn.layer.dropout import dropout
 6 | from Note.nn.activation import activation_dict
 7 | 
 8 | 
 9 | class TransformerEncoderLayer:
10 |     def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout_rate: float = 0.1,
11 |                  activation = tf.nn.relu,
12 |                  layer_norm_eps: float = 1e-5, norm_first: bool = False,
13 |                  bias: bool = True, dtype='float32'):
14 |         self.self_attn = multihead_attention(nhead, input_size=d_model, use_bias=bias, dtype=dtype)
15 |         # Implementation of Feedforward model
16 |         self.linear1 = dense(dim_feedforward, d_model, use_bias=bias, dtype=dtype)
17 |         self.dropout = dropout(dropout_rate)
18 |         self.linear2 = dense(d_model, dim_feedforward, use_bias=bias, dtype=dtype)
19 | 
20 |         self.norm_first = norm_first
21 |         self.norm1 = layer_norm(d_model, epsilon=layer_norm_eps, dtype=dtype)
22 |         self.norm2 = layer_norm(d_model, epsilon=layer_norm_eps, dtype=dtype)
23 |         self.dropout1 = dropout(dropout_rate)
24 |         self.dropout2 = dropout(dropout_rate)
25 | 
26 |         if isinstance(activation, str):
27 |             activation = activation_dict[activation]
28 |         else:
29 |             self.activation = activation
30 | 
31 | 
32 |     def __call__(
33 |             self,
34 |             src,
35 |             src_mask=None,
36 |             train_flag=True
37 |             ):
38 | 
39 |         x = src
40 |         if self.norm_first:
41 |             x = x + self._sa_block(self.norm1(x), src_mask, train_flag)
42 |             x = x + self._ff_block(self.norm2(x), train_flag)
43 |         else:
44 |             x = self.norm1(x + self._sa_block(x, src_mask, train_flag))
45 |             x = self.norm2(x + self._ff_block(x, train_flag))
46 | 
47 |         return x
48 | 
49 | 
50 |     # self-attention block
51 |     def _sa_block(self, x,
52 |                   attn_mask=None, train_flag=True):
53 |         x = self.self_attn(x,
54 |                            mask=attn_mask,
55 |                            )[0]
56 |         if train_flag:
57 |             return self.dropout1(x)
58 |         else:
59 |             return x
60 | 
61 | 
62 |     # feed forward block
63 |     def _ff_block(self, x, train_flag):
64 |         if train_flag:
65 |             x = self.linear2(self.dropout(self.activation(self.linear1(x))))
66 |             return self.dropout2(x)
67 |         else:
68 |             return self.linear2(self.activation(self.linear1(x)))


--------------------------------------------------------------------------------
/Note/nn/layer/add.py:
--------------------------------------------------------------------------------
 1 | class add:
 2 |     def __init__(self):
 3 |         self.save_data_count=None
 4 |         
 5 |         
 6 |     def __call__(self,data):
 7 |         if self.save_data_count!=None:
 8 |             output=data.pop(0)
 9 |             for i in range(1,self.save_data_count):
10 |                 output+=data.pop(0)
11 |         else:
12 |             output=data[0]
13 |             for i in range(1,len(data)):
14 |                 output+=data[i]
15 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/additive_attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | 
 5 | class additive_attention:
 6 |     def __init__(self,input_size=None, use_scale=True, dtype='float32'):
 7 |         self.use_scale = use_scale
 8 |         self.dtype=dtype
 9 |         if input_size!=None and use_scale:
10 |             self.scale = initializer([input_size], 'Xavier', dtype)
11 |             self.param=[self.scale]
12 |     
13 |     def build(self):
14 |         self.output_size=self.input_size
15 |         if self.input_size!=None and self.use_scale:
16 |             self.scale = initializer([self.input_size], 'Xavier', self.dtype)
17 |             self.param=[self.scale]
18 |         return
19 | 
20 |     def __call__(self, query, key):
21 |         """Calculates attention scores as a nonlinear sum of query and key.
22 | 
23 |         Args:
24 |             query: Query tensor of shape `[batch_size, Tq, dim]`.
25 |             key: Key tensor of shape `[batch_size, Tv, dim]`.
26 |         Returns:
27 |             Tensor of shape `[batch_size, Tq, Tv]`.
28 |         """
29 |         if query.dtype!=self.dtype:
30 |             query=tf.cast(query,self.dtype)
31 |         if key.dtype!=self.dtype:
32 |             key=tf.cast(key,self.dtype)
33 |         if self.input_size==None:
34 |             self.input_size=query.shape[-1]
35 |             self.build()
36 |         # Reshape tensors to enable broadcasting.
37 |         # Reshape into [batch_size, Tq, 1, dim].
38 |         q_reshaped = tf.expand_dims(query, axis=-2)
39 |         # Reshape into [batch_size, 1, Tv, dim].
40 |         k_reshaped = tf.expand_dims(key, axis=-3)
41 |         if self.use_scale:
42 |             scale = self.scale
43 |         else:
44 |             scale = 1.0
45 |         return tf.reduce_sum(scale * tf.tanh(q_reshaped + k_reshaped), axis=-1)


--------------------------------------------------------------------------------
/Note/nn/layer/alpha_dropout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class alpha_dropout:
 5 |     """Applies Alpha Dropout to the input.
 6 | 
 7 |     Alpha Dropout is a `Dropout` that keeps mean and variance of inputs
 8 |     to their original values, in order to ensure the self-normalizing property
 9 |     even after this dropout.
10 |     Alpha Dropout fits well to Scaled Exponential Linear Units
11 |     by randomly setting activations to the negative saturation value.
12 | 
13 |     Args:
14 |       rate: float, drop probability (as with `Dropout`).
15 |         The multiplicative noise will have
16 |         standard deviation `sqrt(rate / (1 - rate))`.
17 |       seed: Integer, optional random seed to enable deterministic behavior.
18 | 
19 |     Call arguments:
20 |       inputs: Input tensor (of any rank).
21 |       training: Python boolean indicating whether the layer should behave in
22 |         training mode (adding dropout) or in inference mode (doing nothing).
23 | 
24 |     Input shape:
25 |       Arbitrary. Use the keyword argument `input_shape`
26 |       (tuple of integers, does not include the samples axis)
27 |       when using this layer as the first layer in a model.
28 | 
29 |     Output shape:
30 |       Same shape as input.
31 |     """
32 | 
33 |     def __init__(self, rate, noise_shape=None, seed=7):
34 |         self.rate = rate
35 |         self.noise_shape = noise_shape
36 |         self.seed = seed
37 |         self.supports_masking = True
38 | 
39 |     def _get_noise_shape(self, inputs):
40 |         return self.noise_shape if self.noise_shape else tf.shape(inputs)
41 | 
42 |     def __call__(self, inputs, train_flag=None):
43 |         if 0.0 < self.rate < 1.0:
44 |             noise_shape = self._get_noise_shape(inputs)
45 | 
46 |             def dropped_inputs(inputs=inputs, rate=self.rate):
47 |                 alpha = 1.6732632423543772848170429916717
48 |                 scale = 1.0507009873554804934193349852946
49 |                 alpha_p = -alpha * scale
50 | 
51 |                 kept_idx = tf.math.greater_equal(tf.random.uniform(noise_shape), rate)
52 |                 kept_idx = tf.cast(kept_idx, inputs.dtype)
53 | 
54 |                 # Get affine transformation params
55 |                 a = ((1 - rate) * (1 + rate * alpha_p**2)) ** -0.5
56 |                 b = -a * alpha_p * rate
57 | 
58 |                 # Apply mask
59 |                 x = inputs * kept_idx + alpha_p * (1 - kept_idx)
60 | 
61 |                 # Do affine transformation
62 |                 return a * x + b
63 | 
64 |             return tf.cond(train_flag, lambda: dropped_inputs, lambda: inputs)
65 |         return inputs


--------------------------------------------------------------------------------
/Note/nn/layer/attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | 
 5 | class attention: # define a class for attention mechanism
 6 |     def __init__(self, use_scale=False, score_mode="dot", dtype='float32'):
 7 |         self.use_scale = use_scale
 8 |         self.score_mode = score_mode
 9 |         self.dtype=dtype
10 |         self.param=[]
11 |         if use_scale:
12 |             self.scale = initializer((),'ones',dtype)
13 |             self.param.append(self.scale)
14 |         if score_mode == "concat":
15 |             self.concat_score_weight = initializer((),'ones',dtype)
16 |             self.param.append(self.concat_score_weight)
17 |     
18 |     
19 |     def __call__(self, query, value, key=None): # define the output method
20 |         if query.dtype!=self.dtype:
21 |             query=tf.cast(query,self.dtype)
22 |         if value.dtype!=self.dtype:
23 |             value=tf.cast(value,self.dtype)
24 |         if key is not None and key.dtype!=self.dtype:
25 |             key=tf.cast(key,self.dtype)
26 |         if self.score_mode == "dot":
27 |             if key==None:
28 |                 scores = tf.matmul(query, value, transpose_b=True)
29 |             else:
30 |                 scores = tf.matmul(query, key, transpose_b=True)
31 |             if self.scale is not None:
32 |                 scores *= self.scale
33 |         elif self.score_mode == "concat":
34 |             # Reshape tensors to enable broadcasting.
35 |             # Reshape into [batch_size, Tq, 1, dim].
36 |             q_reshaped = tf.expand_dims(query, axis=-2)
37 |             # Reshape into [batch_size, 1, Tv, dim].
38 |             if key==None:
39 |                 k_reshaped = tf.expand_dims(value, axis=-3)
40 |             else:
41 |                 k_reshaped = tf.expand_dims(key, axis=-3)
42 |             if self.scale is not None:
43 |                 scores = self.concat_score_weight * tf.reduce_sum(
44 |                     tf.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1
45 |                 )
46 |             else:
47 |                 scores = self.concat_score_weight * tf.reduce_sum(
48 |                     tf.tanh(q_reshaped + k_reshaped), axis=-1
49 |                 )
50 |         distribution = tf.nn.softmax(scores)
51 |         return tf.matmul(distribution, value)


--------------------------------------------------------------------------------
/Note/nn/layer/average.py:
--------------------------------------------------------------------------------
 1 | class average:
 2 |     def __init__(self):
 3 |         self.save_data_count=None
 4 |         
 5 |         
 6 |     def __call__(self,data):
 7 |         if self.save_data_count!=None:
 8 |             output=data.pop(0)
 9 |             for i in range(1,self.save_data_count):
10 |                 output+=data.pop(0)
11 |             return output/self.save_data_count
12 |         else:
13 |             output=data[0]
14 |             for i in range(1,len(data)):
15 |                 output+=data[i]
16 |             return output/len(data)


--------------------------------------------------------------------------------
/Note/nn/layer/avg_pool1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class avg_pool1d:
 6 |     def __init__(self, kernel_size, strides=None, padding=0, count_include_pad=True):
 7 |         """
 8 |         Args:
 9 |             kernel_size: int, the size of the pooling window.
10 |             strides: int, stride of the pooling operation.
11 |             padding: int, str, or tuple, the padding applied to the input.
12 |             count_include_pad: bool, whether to include zero padding in the average calculation.
13 |         """
14 |         self.kernel_size = kernel_size
15 |         self.strides = strides if strides is not None else kernel_size
16 |         self.padding = padding
17 |         self.count_include_pad = count_include_pad
18 | 
19 |         if not isinstance(padding, str):
20 |             self.zeropadding1d = nn.zeropadding1d(padding=padding)
21 | 
22 |     def __call__(self, data):
23 |         if not isinstance(self.padding, str):
24 |             padded_data = self.zeropadding1d(data)
25 |             padding = 'VALID'
26 |         else:
27 |             padded_data = data
28 |             padding = self.padding
29 | 
30 |         # Apply avg_pool1d
31 |         pooled = tf.nn.avg_pool1d(
32 |             padded_data, ksize=self.kernel_size, strides=self.strides, padding=padding
33 |         )
34 | 
35 |         if not self.count_include_pad and not isinstance(self.padding, str):
36 |             # Calculate the effective kernel size for each window
37 |             k = self.kernel_size if isinstance(self.kernel_size, int) else self.kernel_size
38 | 
39 |             # Compute the mask of valid elements (non-zero-padded)
40 |             valid_mask = tf.ones_like(data, dtype=data.dtype)
41 |             valid_mask = self.zeropadding1d(valid_mask)
42 | 
43 |             # Apply the same pooling operation to the mask
44 |             valid_counts = tf.nn.avg_pool1d(
45 |                 valid_mask, ksize=self.kernel_size, strides=self.strides, padding='VALID'
46 |             ) * k
47 | 
48 |             # Avoid division by zero
49 |             valid_counts = tf.maximum(valid_counts, 1.0)
50 | 
51 |             # Adjust the pooled output to exclude zero-padded elements
52 |             pooled = pooled * k / valid_counts
53 | 
54 |         return pooled
55 | 


--------------------------------------------------------------------------------
/Note/nn/layer/avg_pool2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class avg_pool2d:
 6 |     def __init__(self, kernel_size, strides=None, padding=0, count_include_pad=True):
 7 |         """
 8 |         Args:
 9 |             kernel_size: int or tuple, the size of the pooling window.
10 |             strides: int or tuple, stride of the pooling operation.
11 |             padding: int, str, or tuple, the padding applied to the input.
12 |             count_include_pad: bool, whether to include zero padding in the average calculation.
13 |         """
14 |         self.kernel_size = kernel_size
15 |         self.strides = strides if strides is not None else kernel_size
16 |         self.padding = padding
17 |         self.count_include_pad = count_include_pad
18 | 
19 |         if not isinstance(padding, str):
20 |             self.zeropadding2d = nn.zeropadding2d(padding=padding)
21 | 
22 |     def __call__(self, data):
23 |         if not isinstance(self.padding, str):
24 |             padded_data = self.zeropadding2d(data)
25 |             padding = 'VALID'
26 |         else:
27 |             padded_data = data
28 |             padding = self.padding
29 | 
30 |         # Apply avg_pool2d
31 |         pooled = tf.nn.avg_pool2d(
32 |             padded_data, ksize=self.kernel_size, strides=self.strides, padding=padding
33 |         )
34 | 
35 |         if not self.count_include_pad and not isinstance(self.padding, str):
36 |             # Calculate the effective kernel sizes for each window
37 |             k_h, k_w = self.kernel_size if isinstance(self.kernel_size, (tuple, list)) else (self.kernel_size, self.kernel_size)
38 | 
39 |             # Compute the mask of valid elements (non-zero-padded)
40 |             valid_mask = tf.ones_like(data, dtype=data.dtype)
41 |             valid_mask = self.zeropadding2d(valid_mask)
42 | 
43 |             # Apply the same pooling operation to the mask
44 |             valid_counts = tf.nn.avg_pool2d(
45 |                 valid_mask, ksize=self.kernel_size, strides=self.strides, padding='VALID'
46 |             ) * (k_h * k_w)
47 | 
48 |             # Avoid division by zero
49 |             valid_counts = tf.maximum(valid_counts, 1.0)
50 | 
51 |             # Adjust the pooled output to exclude zero-padded elements
52 |             pooled = pooled * (k_h * k_w) / valid_counts
53 | 
54 |         return pooled
55 | 


--------------------------------------------------------------------------------
/Note/nn/layer/avg_pool3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class avg_pool3d:
 6 |     def __init__(self, kernel_size, strides=None, padding=0, count_include_pad=True):
 7 |         """
 8 |         Args:
 9 |             kernel_size: int or tuple, the size of the pooling window.
10 |             strides: int or tuple, stride of the pooling operation.
11 |             padding: int, str, or tuple, the padding applied to the input.
12 |             count_include_pad: bool, whether to include zero padding in the average calculation.
13 |         """
14 |         self.kernel_size = kernel_size
15 |         self.strides = strides if strides is not None else kernel_size
16 |         self.padding = padding
17 |         self.count_include_pad = count_include_pad
18 | 
19 |         if not isinstance(padding, str):
20 |             self.zeropadding3d = nn.zeropadding3d(padding=padding)
21 | 
22 |     def __call__(self, data):
23 |         if not isinstance(self.padding, str):
24 |             padded_data = self.zeropadding3d(data)
25 |             padding = 'VALID'
26 |         else:
27 |             padded_data = data
28 |             padding = self.padding
29 | 
30 |         # Apply avg_pool3d
31 |         pooled = tf.nn.avg_pool3d(
32 |             padded_data, ksize=self.kernel_size, strides=self.strides, padding=padding
33 |         )
34 | 
35 |         if not self.count_include_pad and not isinstance(self.padding, str):
36 |             # Calculate the effective kernel sizes for each window
37 |             k_d, k_h, k_w = (
38 |                 self.kernel_size
39 |                 if isinstance(self.kernel_size, (tuple, list))
40 |                 else (self.kernel_size, self.kernel_size, self.kernel_size)
41 |             )
42 | 
43 |             # Compute the mask of valid elements (non-zero-padded)
44 |             valid_mask = tf.ones_like(data, dtype=tf.float32)
45 |             valid_mask = self.zeropadding3d(valid_mask)
46 | 
47 |             # Apply the same pooling operation to the mask
48 |             valid_counts = tf.nn.avg_pool3d(
49 |                 valid_mask, ksize=self.kernel_size, strides=self.strides, padding='VALID'
50 |             ) * (k_d * k_h * k_w)
51 | 
52 |             # Avoid division by zero
53 |             valid_counts = tf.maximum(valid_counts, 1.0)
54 | 
55 |             # Adjust the pooled output to exclude zero-padded elements
56 |             pooled = pooled * (k_d * k_h * k_w) / valid_counts
57 | 
58 |         return pooled
59 | 


--------------------------------------------------------------------------------
/Note/nn/layer/axial_positional_encoding.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class axial_positional_encoding:
 6 |   """A class for generating axial positional encoding for Reformer models."""
 7 | 
 8 |   def __init__(self, d_model, axial_shape, initializer='Xavier', trainable=True, dtype='float32'):
 9 |     """Initializes the axial positional encoding.
10 | 
11 |     Args:
12 |       d_model: int, the dimension of the model embeddings.
13 |       axial_shape: tuple of int, the shape of the input sequence, such as (batch_size, seq_length).
14 |     """
15 |     self.d_model = d_model
16 |     self.axial_shape = axial_shape
17 |     self.num_axial_pos_embs = len(axial_shape)
18 |     self.d_axial_pos_embs = d_model // self.num_axial_pos_embs
19 | 
20 |     # Create the learnable parameters for each axial dimension
21 |     self.weights = []
22 |     
23 |     self.output_size = d_model
24 |     
25 |     self.dtype=dtype
26 |     
27 |     # Create a list to store the parameters
28 |     self.param = []
29 |     
30 |     if trainable==True:
31 |         for i, dim in enumerate(axial_shape):
32 |           weight = nn.initializer((dim, self.d_axial_pos_embs), initializer, dtype)
33 |           self.weights.append(weight)
34 |           self.param.append(weight)
35 |     
36 | 
37 |   def __call__(self, data):
38 |     """Generates the axial positional encoding for the input tensor.
39 | 
40 |     Args:
41 |       data: tf.Tensor of shape [batch_size, seq_length, d_model], the input tensor.
42 | 
43 |     Returns:
44 |       tf.Tensor of shape [batch_size, seq_length, d_model], the output tensor with axial positional encoding added.
45 |     """
46 |     if data.dtype!=self.dtype:
47 |         data=tf.cast(data,self.dtype)
48 |         
49 |     # Reshape the input tensor to match the axial shape
50 |     data = tf.reshape(data, (-1,) + self.axial_shape + (self.d_model,))
51 |     
52 |     # Concatenate the positional embeddings along the last dimension
53 |     pos_emb = tf.concat(
54 |         [tf.expand_dims(weight, axis=0) for weight in self.weights],
55 |         axis=-1
56 |     )
57 | 
58 |     # Broadcast the positional embeddings to the input shape
59 |     pos_emb = tf.broadcast_to(pos_emb, data.shape)
60 | 
61 |     # Add the positional embeddings to the input tensor
62 |     data = data + pos_emb
63 | 
64 |     # Reshape the output tensor to the original shape
65 |     output = tf.reshape(data, (-1, self.axial_shape[0] * self.axial_shape[1], self.d_model))
66 | 
67 |     return output


--------------------------------------------------------------------------------
/Note/nn/layer/bilinear.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | from typing import Tuple
 4 | 
 5 | class bilinear:
 6 |   def __init__(self, embedding_dim: int, output_dim: int, dtype='float32'):
 7 |     """Initializer.
 8 | 
 9 |     Args:
10 |       embedding_dim: An integer that indicates the embedding dimension of the
11 |         interacting vectors.
12 |       output_dim: An integer that indicates the output dimension of the layer.
13 |     """
14 |     self._embedding_dim = embedding_dim
15 |     self._output_dim = output_dim
16 |     self.dtype = dtype
17 |     self._bilinear_weight = initializer(
18 |         shape=(self._embedding_dim, self._embedding_dim, self._output_dim),
19 |         initializer=['normal', 0.0, 1. /self._embedding_dim],
20 |         dtype=dtype)
21 |     self._linear_weight_1 = initializer(
22 |         shape=(self._embedding_dim, self._output_dim),
23 |         initializer=['normal', 0.0, 1. / tf.math.sqrt(self._embedding_dim)],
24 |         dtype=dtype)
25 |     self._linear_weight_2 = initializer(
26 |         shape=(self._embedding_dim, self._output_dim),
27 |         initializer=['normal', 0.0, 1. / tf.math.sqrt(self._embedding_dim)],
28 |         dtype=dtype)
29 |     self._bias = initializer(
30 |         shape=(self._output_dim),
31 |         initializer='zeros',
32 |         dtype=dtype)
33 | 
34 |   def __call__(self, data: Tuple[tf.Tensor]) -> tf.Tensor:
35 |     """Computes bilinear interaction between two vector tensors.
36 | 
37 |     Args:
38 |       data: A pair of tensors of the same shape [batch_size, embedding_dim].
39 | 
40 |     Returns:
41 |       A tensor, of shape [batch_size, output_dim], computed by the bilinear
42 |       interaction.
43 |     """
44 |     # Input of the function must be a list of two tensors.
45 |     vec_1, vec_2 = data
46 |     if vec_1.dtype!=self.dtype:
47 |         vec_1=tf.cast(vec_1, self.dtype)
48 |     if vec_2.dtype!=self.dtype:
49 |         vec_2=tf.cast(vec_2, self.dtype)
50 |     return tf.einsum(
51 |         'bi,ijk,bj->bk', vec_1, self._bilinear_weight, vec_2) + tf.einsum(
52 |             'bi,ik->bk', vec_1, self._linear_weight_1) + tf.einsum(
53 |                 'bi,ik->bk', vec_2, self._linear_weight_2) + self._bias


--------------------------------------------------------------------------------
/Note/nn/layer/concat.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class concat:
 5 |     def __init__(self,axis=-1):
 6 |         self.axis=axis
 7 |         
 8 |         
 9 |     def __call__(self,data):
10 |         output=data.pop(0)
11 |         for i in range(1,len(data)):
12 |             output=tf.concat([output,data.pop(0)],axis=self.axis)
13 |         return output
14 | 


--------------------------------------------------------------------------------
/Note/nn/layer/cropping1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class cropping1d:
 5 |     def __init__(self, cropping=1):
 6 |         if isinstance(cropping, int):
 7 |             self.cropping = tf.constant([[0, 0], [cropping, cropping], [0, 0]])
 8 |         elif isinstance(cropping, list) and len(cropping) == 2:
 9 |             self.cropping = tf.constant([[0, 0], [cropping[0], cropping[1]], [0, 0]])
10 |         else:
11 |             raise ValueError("Invalid cropping argument. It should be an int or a list of two ints.")
12 |     
13 |     
14 |     def __call__(self, data):
15 |         shape = tf.shape(data)
16 |         size = shape[1] - self.cropping[1][0] - self.cropping[1][1]
17 |         return tf.slice(data, begin=[0, self.cropping[1][0], 0], size=[-1, size, -1])


--------------------------------------------------------------------------------
/Note/nn/layer/cropping2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class cropping2d:
 5 |     def __init__(self, cropping=1):
 6 |         if isinstance(cropping, int):
 7 |             self.cropping = tf.constant([[0, 0], [cropping, cropping], [cropping, cropping], [0, 0]])
 8 |         elif isinstance(cropping, list) and len(cropping) == 2:
 9 |             self.cropping = tf.constant([[0, 0], [cropping[0], cropping[0]], [cropping[1], cropping[1]], [0, 0]])
10 |         elif isinstance(cropping, list) and len(cropping) == 4:
11 |             self.cropping = tf.constant([[0, 0], [cropping[0], cropping[1]], [cropping[2], cropping[3]], [0, 0]])
12 |         else:
13 |             raise ValueError("Invalid cropping argument. It should be an int or a list of two or four ints.")
14 |     
15 |     
16 |     def __call__(self, data):
17 |         return tf.slice(data, begin=[0, self.cropping[1][0], self.cropping[2][0], 0], size=[-1, -1 - self.cropping[1][0] - self.cropping[1][1], -1 - self.cropping[2][0] - self.cropping[2][1], -1])


--------------------------------------------------------------------------------
/Note/nn/layer/cropping3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class cropping3d:
 5 |     def __init__(self, cropping=1):
 6 |         if isinstance(cropping, int):
 7 |             self.cropping = tf.constant([[0, 0], [cropping, cropping], [cropping, cropping], [cropping, cropping], [0, 0]])
 8 |         elif isinstance(cropping, list) and len(cropping) == 3:
 9 |             self.cropping = tf.constant([[0, 0], [cropping[0], cropping[0]], [cropping[1], cropping[1]], [cropping[2], cropping[2]], [0, 0]])
10 |         elif isinstance(cropping, list) and len(cropping) == 6:
11 |             self.cropping = tf.constant([[0, 0], [cropping[0], cropping[1]], [cropping[2], cropping[3]], [cropping[4], cropping[5]], [0, 0]])
12 |         else:
13 |             raise ValueError("Invalid cropping argument. It should be an int or a list of three or six ints.")
14 |     
15 |     
16 |     def __call__(self, data):
17 |         shape = tf.shape(data)
18 |         size_1 = shape[1] - self.cropping[1][0] - self.cropping[1][1]
19 |         size_2 = shape[2] - self.cropping[2][0] - self.cropping[2][1]
20 |         size_3 = shape[3] - self.cropping[3][0] - self.cropping[3][1]
21 |         return tf.slice(data, begin=[0, self.cropping[1][0], self.cropping[2][0], self.cropping[3][0], 0], size=[-1, size_1, size_2, size_3, -1])


--------------------------------------------------------------------------------
/Note/nn/layer/dropout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class dropout:
 6 |     def __init__(self,rate,noise_shape=None,seed=None):
 7 |         self.rate=rate
 8 |         self.noise_shape=noise_shape
 9 |         self.seed=seed
10 |         self.train_flag=True
11 |         nn.Model.layer_list.append(self)
12 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
13 |             nn.Model.layer_eval[nn.Model.name]=[]
14 |             nn.Model.layer_eval[nn.Model.name].append(self)
15 |         elif nn.Model.name!=None:
16 |             nn.Model.layer_eval[nn.Model.name].append(self)
17 |         
18 |     
19 |     def __call__(self,data,training=None):
20 |         if training==None:
21 |             training=self.train_flag
22 |         if training==True:
23 |             output=tf.nn.dropout(data,self.rate,noise_shape=self.noise_shape,seed=self.seed)
24 |         else:
25 |             output=data
26 |         return output
27 | 


--------------------------------------------------------------------------------
/Note/nn/layer/dynamic_tanh.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class DynamicTanh:
 6 |     def __init__(self, normalized_shape, alpha_init_value=0.5):
 7 |         self.normalized_shape = normalized_shape
 8 |         self.alpha_init_value = alpha_init_value
 9 | 
10 |         self.alpha = nn.Parameter(tf.ones(1) * alpha_init_value)
11 |         self.weight = nn.Parameter(tf.ones(normalized_shape))
12 |         self.bias = nn.Parameter(tf.zeros(normalized_shape))
13 | 
14 |     def __call__(self, x):
15 |         x = tf.nn.tanh(self.alpha * x)
16 |         x = x * self.weight + self.bias
17 |         return x
18 | 


--------------------------------------------------------------------------------
/Note/nn/layer/flatten.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class flatten:
 5 |     def __init__(self):
 6 |         self.output_size=None
 7 |     
 8 |     
 9 |     def __call__(self,data):
10 |         data_shape=tf.shape(data)
11 |         batch_size=data_shape[0]
12 |         num_elements=tf.reduce_prod(data_shape[1:])
13 |         output=tf.reshape(data,[batch_size,num_elements])
14 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/format.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Union
 3 | 
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | class Format(str, Enum):
 8 |     NCHW = 'NCHW'
 9 |     NHWC = 'NHWC'
10 |     NCL = 'NCL'
11 |     NLC = 'NLC'
12 | 
13 | 
14 | FormatT = Union[str, Format]
15 | 
16 | 
17 | def get_spatial_dim(fmt: FormatT):
18 |     fmt = Format(fmt)
19 |     if fmt is Format.NLC:
20 |         dim = (1,)
21 |     elif fmt is Format.NCL:
22 |         dim = (2,)
23 |     elif fmt is Format.NHWC:
24 |         dim = (1, 2)
25 |     else:
26 |         dim = (2, 3)
27 |     return dim
28 | 
29 | 
30 | def get_channel_dim(fmt: FormatT):
31 |     fmt = Format(fmt)
32 |     if fmt is Format.NHWC:
33 |         dim = 3
34 |     elif fmt is Format.NLC:
35 |         dim = 2
36 |     else:
37 |         dim = 1
38 |     return dim
39 | 
40 | 
41 | def nchw_to(x, fmt: Format):
42 |     if fmt == Format.NHWC:
43 |         x = tf.transpose(x, (0, 2, 3, 1))
44 |     elif fmt == Format.NLC:
45 |         N, C, H, W = x.shape
46 |         x = tf.transpose(tf.reshape(x, (N, C, -1)), (0, 2, 1))
47 |     elif fmt == Format.NCL:
48 |         N, C, H, W = x.shape
49 |         x = tf.reshape(x, (N, C, -1))
50 |     return x
51 | 
52 | 
53 | def nhwc_to(x, fmt: Format):
54 |     if fmt == Format.NCHW:
55 |         x = tf.transpose(x, (0, 3, 1, 2))
56 |     elif fmt == Format.NLC:
57 |         N, H, W, C = x.shape
58 |         x = tf.reshape(x, (N, -1, C))
59 |     elif fmt == Format.NCL:
60 |         N, H, W, C = x.shape
61 |         x = tf.transpose(tf.reshape(x, (N, -1, C)), (0, 2, 1))
62 |     return x


--------------------------------------------------------------------------------
/Note/nn/layer/gaussian_dropout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class gaussian_dropout:
 6 |     """Apply multiplicative 1-centered Gaussian noise.
 7 | 
 8 |     As it is a regularization layer, it is only active at training time.
 9 | 
10 |     Args:
11 |       rate: Float, drop probability (as with `Dropout`).
12 |         The multiplicative noise will have
13 |         standard deviation `sqrt(rate / (1 - rate))`.
14 |       seed: Integer, optional random seed to enable deterministic behavior.
15 | 
16 |     Call arguments:
17 |       inputs: Input tensor (of any rank).
18 |       training: Python boolean indicating whether the layer should behave in
19 |         training mode (adding dropout) or in inference mode (doing nothing).
20 | 
21 |     Input shape:
22 |       Arbitrary. Use the keyword argument `input_shape`
23 |       (tuple of integers, does not include the samples axis)
24 |       when using this layer as the first layer in a model.
25 | 
26 |     Output shape:
27 |       Same shape as input.
28 |     """
29 | 
30 |     def __init__(self, rate, seed=7):
31 |         self.rate = rate
32 |         self.seed = seed
33 |         self.random_generator = tf.random.Generator.from_seed(self.seed)
34 |         self.train_flag = True
35 |         nn.Model.layer_list.append(self)
36 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
37 |             nn.Model.layer_eval[nn.Model.name]=[]
38 |             nn.Model.layer_eval[nn.Model.name].append(self)
39 |         elif nn.Model.name!=None:
40 |             nn.Model.layer_eval[nn.Model.name].append(self)
41 | 
42 |     def __call__(self, data, training=None):
43 |         if training==None:
44 |             training=self.train_flag
45 |         if 0 < self.rate < 1:
46 |             def noised():
47 |                 stddev = tf.math.sqrt(self.rate / (1.0 - self.rate))
48 |                 return data * self.random_generator.normal(
49 |                     shape=tf.shape(data),
50 |                     mean=1.0,
51 |                     stddev=stddev,
52 |                     dtype=data.dtype,
53 |                 )
54 | 
55 |             return tf.cond(training, noised, lambda: data)
56 |         return data
57 | 


--------------------------------------------------------------------------------
/Note/nn/layer/gaussian_noise.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class gaussian_noise:
 5 |     """Apply additive zero-centered Gaussian noise.
 6 | 
 7 |     This is useful to mitigate overfitting
 8 |     (you could see it as a form of random data augmentation).
 9 |     Gaussian Noise (GS) is a natural choice as corruption process
10 |     for real valued inputs.
11 | 
12 |     As it is a regularization layer, it is only active at training time.
13 | 
14 |     Args:
15 |       stddev: Float, standard deviation of the noise distribution.
16 |       seed: Integer, optional random seed to enable deterministic behavior.
17 | 
18 |     Call arguments:
19 |       inputs: Input tensor (of any rank).
20 |       training: Python boolean indicating whether the layer should behave in
21 |         training mode (adding noise) or in inference mode (doing nothing).
22 | 
23 |     Input shape:
24 |       Arbitrary. Use the keyword argument `input_shape`
25 |       (tuple of integers, does not include the samples axis)
26 |       when using this layer as the first layer in a model.
27 | 
28 |     Output shape:
29 |       Same shape as input.
30 |     """
31 | 
32 |     def __init__(self, stddev, seed=7):
33 |         self.stddev = stddev
34 |         self.seed = seed
35 |         self.random_generator = tf.random.Generator.from_seed(self.seed)
36 | 
37 |     def __call__(self, data, train_flag=True):
38 |         def noised():
39 |             return data + self.random_generator.normal(
40 |                                 shape=tf.shape(data),
41 |                                 mean=0.0,
42 |                                 stddev=self.stddev,
43 |                                 dtype=data.dtype,
44 |                             )
45 | 
46 |         return tf.cond(train_flag, lambda: noised(), lambda: data)


--------------------------------------------------------------------------------
/Note/nn/layer/global_avg_pool1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_avg_pool1d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |     
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_mean(data,[1],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/global_avg_pool2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_avg_pool2d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |     
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_mean(data,[1,2],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/global_avg_pool3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_avg_pool3d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |         
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_mean(data,[1,2,3],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/global_context.py:
--------------------------------------------------------------------------------
 1 | """ Global Context Attention Block
 2 | 
 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond`
 4 |     - https://arxiv.org/abs/1904.11492
 5 | 
 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet
 7 | 
 8 | Hacked together by / Copyright 2024 NoteDance
 9 | """
10 | import tensorflow as tf
11 | from Note import nn
12 | 
13 | 
14 | class GlobalContext:
15 | 
16 |     def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False,
17 |                  rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=tf.nn.relu, gate_layer=tf.nn.sigmoid):
18 |         self.conv_attn = nn.conv2d(1, kernel_size=1, input_size=channels, use_bias=True) if use_attn else None
19 | 
20 |         if rd_channels is None:
21 |             rd_channels = nn.make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
22 |         if fuse_add:
23 |             self.mlp_add = nn.ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=nn.layer_norm)
24 |         else:
25 |             self.mlp_add = None
26 |         if fuse_scale:
27 |             self.mlp_scale = nn.ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=nn.layer_norm)
28 |         else:
29 |             self.mlp_scale = None
30 | 
31 |         self.gate = gate_layer
32 |         self.init_last_zero = init_last_zero
33 |         self.reset_parameters()
34 | 
35 |     def reset_parameters(self):
36 |         if self.conv_attn is not None:
37 |             nn.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu')
38 |         if self.mlp_add is not None:
39 |             self.mlp_add.fc2.weight.assign(tf.zeros(self.mlp_add.fc2.weight.shape))
40 | 
41 |     def __call__(self, x):
42 |         B, H, W, C = x.shape
43 | 
44 |         if self.conv_attn is not None:
45 |             attn = tf.reshape(self.conv_attn(x), (B, 1, H * W))  # (B, 1, H * W)
46 |             attn = tf.expand_dims(tf.nn.softmax(attn, axis=-1), axis=3)  # (B, 1, H * W, 1)
47 |             context = tf.matmul(tf.expand_dims(tf.reshape(tf.transpose(x, (0, 3, 1, 2)), (B, C, H * W)), axis=1), attn)
48 |             context = tf.reshape(context, (B, 1, 1, C))
49 |         else:
50 |             context = tf.reduce_mean(x, axis=(1, 2), keepdims=True)
51 | 
52 |         if self.mlp_scale is not None:
53 |             mlp_x = self.mlp_scale(context)
54 |             x = x * self.gate(mlp_x)
55 |         if self.mlp_add is not None:
56 |             mlp_x = self.mlp_add(context)
57 |             x = x + mlp_x
58 | 
59 |         return x


--------------------------------------------------------------------------------
/Note/nn/layer/global_max_pool1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_max_pool1d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |     
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_max(data,[1],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/global_max_pool2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_max_pool2d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |     
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_max(data,[1,2],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/global_max_pool3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class global_max_pool3d:
 5 |     def __init__(self,keepdims=False):
 6 |         self.keepdims=keepdims
 7 |         
 8 |     
 9 |     def __call__(self,data):
10 |         return tf.reduce_max(data,[1,2,3],keepdims=self.keepdims)


--------------------------------------------------------------------------------
/Note/nn/layer/grn.py:
--------------------------------------------------------------------------------
 1 | """ Global Response Normalization Module
 2 | 
 3 | Based on the GRN layer presented in
 4 | `ConvNeXt-V2 - Co-designing and Scaling ConvNets with Masked Autoencoders` - https://arxiv.org/abs/2301.00808
 5 | 
 6 | This implementation
 7 | * works for both NCHW and NHWC tensor layouts
 8 | * uses affine param names matching existing torch norm layers
 9 | * slightly improves eager mode performance via fused addcmul
10 | 
11 | Hacked together by / Copyright 2024 NoteDance
12 | """
13 | 
14 | import tensorflow as tf
15 | from Note import nn
16 | 
17 | 
18 | class GlobalResponseNorm:
19 |     """ Global Response Normalization layer
20 |     """
21 |     def __init__(self, dim, eps=1e-6, channels_last=True):
22 |         self.eps = eps
23 |         if channels_last:
24 |             self.spatial_dim = (1, 2)
25 |             self.channel_dim = -1
26 |             self.wb_shape = (1, 1, 1, -1)
27 |         else:
28 |             self.spatial_dim = (2, 3)
29 |             self.channel_dim = 1
30 |             self.wb_shape = (1, -1, 1, 1)
31 | 
32 |         self.weight = nn.Parameter(tf.zeros(dim))
33 |         self.bias = nn.Parameter(tf.zeros(dim))
34 | 
35 |     def __call__(self, x):
36 |         x_g = tf.norm(x, ord=2, axis=self.spatial_dim, keepdims=True)
37 |         x_n = x_g / (tf.reduce_mean(x_g, axis=self.channel_dim, keepdims=True) + self.eps)
38 |         bias_reshaped = tf.reshape(self.bias, self.wb_shape)
39 |         weight_reshaped = tf.reshape(self.weight, self.wb_shape)
40 |         product = tf.multiply(x, x_n)
41 |         weighted_product = tf.multiply(weight_reshaped, product)
42 |         return x + tf.add(bias_reshaped, weighted_product)


--------------------------------------------------------------------------------
/Note/nn/layer/identity.py:
--------------------------------------------------------------------------------
1 | class identity:
2 |     def __init__(self,input_size=None):
3 |         self.input_size=input_size
4 |         if input_size!=None:
5 |             self.output_size=input_size
6 |     
7 |     
8 |     def __call__(self,data):
9 |         return data


--------------------------------------------------------------------------------
/Note/nn/layer/image_preprocessing/center_crop.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | H_AXIS = -3
 5 | W_AXIS = -2
 6 | 
 7 | 
 8 | class center_crop:
 9 |     def __init__(self, height, width, dtype='float32'):
10 |         self.height = height
11 |         self.width = width
12 |         self.compute_dtype=dtype
13 | 
14 | 
15 |     def __call__(self, data):
16 |         data = tf.cast(data, self.compute_dtype)
17 |         input_shape = tf.shape(data)
18 |         h_diff = input_shape[H_AXIS] - self.height
19 |         w_diff = input_shape[W_AXIS] - self.width
20 | 
21 |         def center_crop():
22 |             h_start = tf.cast(h_diff / 2, tf.int32)
23 |             w_start = tf.cast(w_diff / 2, tf.int32)
24 |             return tf.image.crop_to_bounding_box(
25 |                 data, h_start, w_start, self.height, self.width
26 |             )
27 | 
28 |         def upsize():
29 |             outputs = tf.image.resize(
30 |                 data, [self.height, self.width], method=tf.image.ResizeMethod.BICUBIC
31 |             )
32 |             # resize will always output float32, so we need to re-cast.
33 |             return tf.cast(outputs, self.compute_dtype)
34 | 
35 |         return tf.cond(
36 |             tf.reduce_all((h_diff >= 0, w_diff >= 0)), center_crop, upsize
37 |         )
38 | 
39 | 
40 |     def compute_output_shape(self, input_shape):
41 |         input_shape = tf.TensorShape(input_shape).as_list()
42 |         input_shape[H_AXIS] = self.height
43 |         input_shape[W_AXIS] = self.width
44 |         return tf.TensorShape(input_shape) 


--------------------------------------------------------------------------------
/Note/nn/layer/image_preprocessing/random_crop.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | H_AXIS = -3
 4 | W_AXIS = -2
 5 | 
 6 | class random_crop:
 7 |     """A preprocessing layer which randomly crops images during training.
 8 | 
 9 |     During training, this layer will randomly choose a location to crop images
10 |     down to a target size. The layer will crop all the images in the same batch
11 |     to the same cropping location.
12 | 
13 |     At inference time, and during training if an input image is smaller than the
14 |     target size, the input will be resized and cropped so as to return the
15 |     largest possible window in the image that matches the target aspect ratio.
16 |     If you need to apply random cropping at inference time, set `training` to
17 |     True when calling the layer.
18 | 
19 |     Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
20 |     of integer or floating point dtype. By default, the layer will output
21 |     floats.
22 | 
23 |     For an overview and full list of preprocessing layers, see the preprocessing
24 |     [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
25 | 
26 |     Input shape:
27 |         3D (unbatched) or 4D (batched) tensor with shape:
28 |         `(..., height, width, channels)`, in `"channels_last"` format.
29 | 
30 |     Output shape:
31 |         3D (unbatched) or 4D (batched) tensor with shape:
32 |         `(..., target_height, target_width, channels)`.
33 | 
34 |     Args:
35 |         height: Integer, the height of the output shape.
36 |         width: Integer, the width of the output shape.
37 |         seed: Integer. Used to create a random seed.
38 |     """
39 | 
40 |     def __init__(self, height, width, seed=7):
41 |         self.height = height
42 |         self.width = width
43 |         self.seed = seed
44 |         self.random_generator = tf.random.Generator.from_seed(seed)
45 | 
46 |     def __call__(self, data, train_flag=True):
47 |         input_shape = tf.shape(data)
48 |         h_diff = input_shape[H_AXIS] - self.height
49 |         w_diff = input_shape[W_AXIS] - self.width
50 | 
51 |         def random_crop():
52 |             dtype = input_shape.dtype
53 |             rands = self.random_generator.uniform(
54 |                 [2], 0, dtype.max, dtype
55 |             )
56 |             h_start = rands[0] % (h_diff + 1)
57 |             w_start = rands[1] % (w_diff + 1)
58 |             return tf.image.crop_to_bounding_box(
59 |                 data, h_start, w_start, self.height, self.width
60 |             )
61 | 
62 |         def resize():
63 |             outputs = tf.image.resize(data, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR )
64 |             # smart_resize will always output float32, so we need to re-cast.
65 |             return tf.cast(outputs, data.dtype)
66 | 
67 |         return tf.cond(
68 |             tf.reduce_all((train_flag, h_diff >= 0, w_diff >= 0)),
69 |             random_crop,
70 |             resize,
71 |         )


--------------------------------------------------------------------------------
/Note/nn/layer/image_preprocessing/rescaling.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class rescaling:
 5 |     """A preprocessing layer which rescales input values to a new range.
 6 | 
 7 |     This layer rescales every value of an input (often an image) by multiplying
 8 |     by `scale` and adding `offset`.
 9 | 
10 |     For instance:
11 | 
12 |     1. To rescale an input in the `[0, 255]` range
13 |     to be in the `[0, 1]` range, you would pass `scale=1./255`.
14 | 
15 |     2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range,
16 |     you would pass `scale=1./127.5, offset=-1`.
17 | 
18 |     The rescaling is applied both during training and inference. Inputs can be
19 |     of integer or floating point dtype, and by default the layer will output
20 |     floats.
21 | 
22 |     For an overview and full list of preprocessing layers, see the preprocessing
23 |     [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
24 | 
25 |     Input shape:
26 |         Arbitrary.
27 | 
28 |     Output shape:
29 |         Same as input.
30 | 
31 |     Args:
32 |         scale: Float, the scale to apply to the inputs.
33 |         offset: Float, the offset to apply to the inputs.
34 |     """
35 | 
36 |     def __init__(self, scale, offset=0.0):
37 |         self.scale = scale
38 |         self.offset = offset
39 | 
40 |     def __call__(self, data):
41 |         dtype = data.dtype
42 |         scale = tf.cast(self.scale, dtype)
43 |         offset = tf.cast(self.offset, dtype)
44 |         return tf.cast(data, dtype) * scale + offset


--------------------------------------------------------------------------------
/Note/nn/layer/interpolate.py:
--------------------------------------------------------------------------------
 1 | """ Interpolation helpers for Note layer
 2 | 
 3 | RegularGridInterpolator from https://github.com/sbarratt/torch_interpolations
 4 | Copyright NoteDance, Apache 2.0 license
 5 | """
 6 | import tensorflow as tf
 7 | from itertools import product
 8 | 
 9 | 
10 | class RegularGridInterpolator:
11 |     """ Interpolate data defined on a rectilinear grid with even or uneven spacing.
12 |     Produces similar results to scipy RegularGridInterpolator or interp2d
13 |     in 'linear' mode.
14 | 
15 |     Taken from https://github.com/sbarratt/torch_interpolations
16 |     """
17 | 
18 |     def __init__(self, points, values):
19 |         self.points = points
20 |         self.values = values
21 | 
22 |         assert isinstance(self.points, tuple) or isinstance(self.points, list)
23 |         assert isinstance(self.values, tf.Tensor)
24 | 
25 |         self.ms = list(self.values.shape)
26 |         self.n = len(self.points)
27 | 
28 |         assert len(self.ms) == self.n
29 | 
30 |         for i, p in enumerate(self.points):
31 |             assert isinstance(p, tf.Tensor)
32 |             assert p.shape[0] == self.values.shape[i]
33 | 
34 |     def __call__(self, points_to_interp):
35 |         assert self.points is not None
36 |         assert self.values is not None
37 | 
38 |         assert len(points_to_interp) == len(self.points)
39 |         K = points_to_interp[0].shape[0]
40 |         for x in points_to_interp:
41 |             assert x.shape[0] == K
42 | 
43 |         idxs = []
44 |         dists = []
45 |         overalls = []
46 |         for p, x in zip(self.points, points_to_interp):
47 |             idx_right = tf.searchsorted(p, x, side="right")
48 |             idx_right[idx_right >= p.shape[0]] = p.shape[0] - 1
49 |             idx_left = tf.clip_by_value(idx_right - 1, 0, tf.shape(p)[0] - 1)
50 |             dist_left = x - p[idx_left]
51 |             dist_right = p[idx_right] - x
52 |             dist_left[dist_left < 0] = 0.
53 |             dist_right[dist_right < 0] = 0.
54 |             both_zero = (dist_left == 0) & (dist_right == 0)
55 |             dist_left[both_zero] = dist_right[both_zero] = 1.
56 | 
57 |             idxs.append((idx_left, idx_right))
58 |             dists.append((dist_left, dist_right))
59 |             overalls.append(dist_left + dist_right)
60 | 
61 |         numerator = 0.
62 |         for indexer in product([0, 1], repeat=self.n):
63 |             as_s = [idx[onoff] for onoff, idx in zip(indexer, idxs)]
64 |             bs_s = [dist[1 - onoff] for onoff, dist in zip(indexer, dists)]
65 |             numerator += self.values[as_s] * \
66 |                 tf.reduce_prod(tf.stack(bs_s), axis=0)
67 |         denominator = tf.reduce_prod(tf.stack(overalls), axis=0)
68 |         return numerator / denominator


--------------------------------------------------------------------------------
/Note/nn/layer/layer_scale.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class LayerScale:
 6 |     """ LayerScale on tensors with channels in last-dim.
 7 |     """
 8 |     def __init__(
 9 |             self,
10 |             dim: int,
11 |             init_values: float = 1e-5,
12 |     ) -> None:
13 |         self.gamma = nn.Parameter(init_values * tf.ones(dim))
14 | 
15 |     def __call__(self, x):
16 |         return x * self.gamma
17 | 
18 | 
19 | class LayerScale2d:
20 |     """ LayerScale for tensors with torch 2D NHWC layout.
21 |     """
22 |     def __init__(
23 |             self,
24 |             dim: int,
25 |             init_values: float = 1e-5,
26 |     ):
27 |         self.gamma = nn.Parameter(init_values * tf.ones(dim))
28 | 
29 |     def __call__(self, x):
30 |         gamma = tf.reshape(self.gamma, (1, 1, 1, -1))
31 |         return x * gamma
32 | 


--------------------------------------------------------------------------------
/Note/nn/layer/lp_pool1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from typing import Union
 4 | 
 5 | 
 6 | class lp_pool1d:
 7 |     r"""Apply a 1D power-average pooling over an input signal composed of several input planes.
 8 | 
 9 |     If the sum of all inputs to the power of `p` is
10 |     zero, the gradient is set to zero as well.
11 | 
12 |     """
13 |     def __init__(self, norm_type: Union[int, float], kernel_size, strides = None):
14 |         self.norm_type = norm_type
15 |         self.kernel_size = kernel_size
16 |         self.strides = strides
17 |         if strides is not None:
18 |             self.avg_pool1d = nn.avg_pool1d(kernel_size, strides, 0)
19 |         else:
20 |             self.avg_pool1d = nn.avg_pool1d(kernel_size, padding=0)
21 |     
22 |     def __call__(self, input):
23 |         if self.strides is not None:
24 |             out = self.avg_pool1d(tf.pow(input, self.norm_type))
25 |         else:
26 |             out = self.avg_pool1d(tf.pow(input, self.norm_type))
27 |     
28 |         return tf.pow((tf.sign(out) * tf.nn.relu(tf.abs(out))) * self.kernel_size, (1.0 / self.norm_type))


--------------------------------------------------------------------------------
/Note/nn/layer/lp_pool2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from typing import Union
 4 | import collections
 5 | from itertools import repeat
 6 | 
 7 | 
 8 | def _ntuple(n, name="parse"):
 9 |     def parse(x):
10 |         if isinstance(x, collections.abc.Iterable):
11 |             return tuple(x)
12 |         return tuple(repeat(x, n))
13 | 
14 |     parse.__name__ = name
15 |     return parse
16 | 
17 | 
18 | _pair = _ntuple(2, "_pair")
19 | 
20 | 
21 | class lp_pool2d:
22 |     r"""
23 |     Apply a 2D power-average pooling over an input signal composed of several input planes.
24 | 
25 |     If the sum of all inputs to the power of `p` is
26 |     zero, the gradient is set to zero as well.
27 | 
28 |     """
29 |     def __init__(self, norm_type: Union[int, float], kernel_size, strides = None):
30 |         self.norm_type = norm_type
31 |         self.kernel_size = kernel_size
32 |         self.strides = strides
33 |         if strides is not None:
34 |             self.avg_pool2d = nn.avg_pool2d(kernel_size, strides, 0)
35 |         else:
36 |             self.avg_pool2d = nn.avg_pool2d(kernel_size, padding=0)
37 |     
38 |     def __call__(self, input):
39 |         kw, kh = _pair(self.kernel_size)
40 |         if self.strides is not None:
41 |             out = self.avg_pool2d(tf.pow(input, self.norm_type))
42 |         else:
43 |             out = self.avg_pool2d(tf.pow(input, self.norm_type))
44 |     
45 |         return tf.pow((tf.sign(out) * tf.nn.relu(tf.abs(out))) * (kw * kh), (1.0 / self.norm_type))


--------------------------------------------------------------------------------
/Note/nn/layer/lp_pool3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | from typing import Union
 4 | import collections
 5 | from itertools import repeat
 6 | 
 7 | 
 8 | def _ntuple(n, name="parse"):
 9 |     def parse(x):
10 |         if isinstance(x, collections.abc.Iterable):
11 |             return tuple(x)
12 |         return tuple(repeat(x, n))
13 | 
14 |     parse.__name__ = name
15 |     return parse
16 | 
17 | 
18 | _triple = _ntuple(3, "_triple")
19 | 
20 | 
21 | class lp_pool3d:
22 |     r"""
23 |     Apply a 3D power-average pooling over an input signal composed of several input planes.
24 | 
25 |     If the sum of all inputs to the power of `p` is
26 |     zero, the gradient is set to zero as well.
27 |     
28 |     """
29 |     def __init__(self, norm_type: Union[int, float], kernel_size, strides = None):
30 |         self.norm_type = norm_type
31 |         self.kernel_size = kernel_size
32 |         self.strides = strides
33 |         if strides is not None:
34 |             self.avg_pool3d = nn.avg_pool3d(kernel_size, strides, 0)
35 |         else:
36 |             self.avg_pool3d = nn.avg_pool3d(kernel_size, padding=0)
37 |     
38 |     def __call__(self, input):
39 |         kd, kw, kh = _triple(self.kernel_size)
40 |         if self.strides is not None:
41 |             out = self.avg_pool3d(tf.pow(input, self.norm_type))
42 |         else:
43 |             out = self.avg_pool3d(tf.pow(input, self.norm_type))
44 |     
45 |         return tf.pow((tf.sign(out) * tf.nn.relu(tf.abs(out))) * (kd * kw * kh), (1.0 / self.norm_type))


--------------------------------------------------------------------------------
/Note/nn/layer/masked_softmax.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def _large_compatible_negative(tensor_type):
 5 |   """Large negative number as Tensor.
 6 | 
 7 |   This function is necessary because the standard value for epsilon
 8 |   in this module (-1e9) cannot be represented using `tf.float16`.
 9 | 
10 |   Args:
11 |     tensor_type: A dtype to determine the type.
12 | 
13 |   Returns:
14 |     A large negative number.
15 |   """
16 |   if tensor_type == tf.float16:
17 |     return tf.float16.min
18 |   return -1e9
19 | 
20 | 
21 | class masked_softmax:
22 |   """Performs a softmax with optional masking on a tensor.
23 | 
24 |   Args:
25 |     mask_expansion_axes: Any axes that should be padded on the mask tensor.
26 |     normalization_axes: On which axes the softmax should perform.
27 |   """
28 | 
29 |   def __init__(self,
30 |                mask_expansion_axes=None,
31 |                normalization_axes=None,
32 |                ):
33 |     self._mask_expansion_axes = mask_expansion_axes
34 |     if normalization_axes is None:
35 |       self._normalization_axes = (-1,)
36 |     else:
37 |       self._normalization_axes = normalization_axes
38 | 
39 | 
40 |   def __call__(self, scores, mask=None):
41 | 
42 |     if mask is not None:
43 |       for _ in range(len(scores.shape) - len(mask.shape)):
44 |         mask = tf.expand_dims(mask, axis=self._mask_expansion_axes)
45 | 
46 |       # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
47 |       # masked positions, this operation will create a tensor which is 0.0 for
48 |       # positions we want to attend and -1.e9 for masked positions.
49 |       adder = (1.0 - tf.cast(mask, scores.dtype)) * _large_compatible_negative(
50 |           scores.dtype)
51 |       # Since we are adding it to the raw scores before the softmax, this is
52 |       # effectively the same as removing these entirely.
53 |       scores += adder
54 | 
55 |     if len(self._normalization_axes) == 1:
56 |       return tf.nn.softmax(scores, axis=self._normalization_axes[0])
57 |     else:
58 |       return tf.math.exp(scores - tf.math.reduce_logsumexp(
59 |           scores, axis=self._normalization_axes, keepdims=True))


--------------------------------------------------------------------------------
/Note/nn/layer/masking.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def masking(inputs,mask_value=0.0,mask_mode="zero"):
 5 |     inputs=tf.convert_to_tensor(inputs)
 6 |     dtype=inputs.dtype
 7 |     mask=tf.equal(inputs,mask_value)
 8 |     mask=tf.cast(mask,dtype)
 9 |     mask=tf.broadcast_to(mask,tf.shape(inputs))
10 |     if mask_mode=="zero":
11 |         extreme_value=0
12 |     elif mask_mode=="min":
13 |         if dtype==tf.float32:
14 |             extreme_value=tf.float32.min
15 |         elif dtype==tf.float64:
16 |             extreme_value=tf.float64.min
17 |         elif dtype==tf.int32:
18 |             extreme_value=tf.int32.min
19 |         elif dtype==tf.int64:
20 |             extreme_value=tf.int64.min
21 |         else:
22 |             raise ValueError("Unsupported dtype: {}".format(dtype))
23 |     elif mask_mode=="max":
24 |         if dtype==tf.float32:
25 |             extreme_value=tf.float32.max
26 |         elif dtype==tf.float64:
27 |             extreme_value=tf.float64.max
28 |         elif dtype==tf.int32:
29 |             extreme_value=tf.int32.max
30 |         elif dtype==tf.int64:
31 |             extreme_value=tf.int64.max
32 |         else:
33 |             raise ValueError("Unsupported dtype: {}".format(dtype))
34 |     else:
35 |         raise ValueError("Invalid mask mode: {}".format(mask_mode))
36 |     outputs=inputs*(1-mask)+mask*extreme_value
37 |     return outputs,mask
38 | 


--------------------------------------------------------------------------------
/Note/nn/layer/matmul_with_margin.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from typing import Tuple
 3 | 
 4 | class matmul_with_margin:
 5 |   """This layer computs a dot product matrix given two encoded inputs.
 6 | 
 7 |   Args:
 8 |     logit_scale: The scaling factor of dot products when doing training.
 9 |     logit_margin: The margin value between the positive and negative examples
10 |       when doing training.
11 |   """
12 | 
13 |   def __init__(self,
14 |                logit_scale=1.0,
15 |                logit_margin=0.0,
16 |                ):
17 |     self.logit_scale = logit_scale
18 |     self.logit_margin = logit_margin
19 | 
20 |   def __call__(self, left_encoded: tf.Tensor,
21 |            right_encoded: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
22 |     batch_size = left_encoded[0]
23 | 
24 |     # Left -> Right dot product.
25 |     left_dot_products = tf.matmul(
26 |         left_encoded, right_encoded, transpose_b=True)
27 | 
28 |     self.left_logits = self.logit_scale * (
29 |         left_dot_products - self.logit_margin * tf.eye(batch_size))
30 | 
31 |     # Right -> Left dot product.
32 |     self.right_logits = tf.transpose(self.left_logits)
33 | 
34 |     return (self.left_logits, self.right_logits)


--------------------------------------------------------------------------------
/Note/nn/layer/max_pool1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class max_pool1d:
 6 |     def __init__(self,kernel_size=2,strides=None,padding=0):
 7 |         self.kernel_size=kernel_size
 8 |         self.strides=strides if strides!=None else kernel_size
 9 |         self.padding=padding
10 |         if not isinstance(padding,str):
11 |             self.zeropadding1d=nn.zeropadding1d(padding=padding)
12 |     
13 |     
14 |     def __call__(self,data):
15 |         if not isinstance(self.padding,str):
16 |             data=self.zeropadding1d(data)
17 |             padding='VALID'
18 |         else:
19 |             padding=self.padding
20 |         return tf.nn.max_pool1d(data,ksize=self.kernel_size,strides=self.strides,padding=padding)
21 | 


--------------------------------------------------------------------------------
/Note/nn/layer/max_pool2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class max_pool2d:
 6 |     def __init__(self,kernel_size=(2,2),strides=None,padding=0):
 7 |         self.kernel_size=kernel_size
 8 |         self.strides=strides if strides!=None else kernel_size
 9 |         self.padding=padding
10 |         if not isinstance(padding,str):
11 |             self.zeropadding2d=nn.zeropadding2d(padding=padding)
12 |     
13 |     
14 |     def __call__(self,data):
15 |         if not isinstance(self.padding,str):
16 |             data=self.zeropadding2d(data)
17 |             padding='VALID'
18 |         else:
19 |             padding=self.padding
20 |         return tf.nn.max_pool2d(data,ksize=self.kernel_size,strides=self.strides,padding=padding)
21 | 


--------------------------------------------------------------------------------
/Note/nn/layer/max_pool3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class max_pool3d:
 6 |     def __init__(self,kernel_size=(2,2,2),strides=None,padding=0):
 7 |         self.kernel_size=kernel_size
 8 |         self.strides=strides if strides!=None else kernel_size
 9 |         self.padding=padding
10 |         if not isinstance(padding,str):
11 |             self.zeropadding3d=nn.zeropadding3d(padding=padding)
12 |     
13 |     
14 |     def __call__(self,data):
15 |         if not isinstance(self.padding,str):
16 |             data=self.zeropadding3d(data)
17 |             padding='VALID'
18 |         else:
19 |             padding=self.padding
20 |         return tf.nn.max_pool3d(data,ksize=self.kernel_size,strides=self.strides,padding=padding)
21 | 


--------------------------------------------------------------------------------
/Note/nn/layer/maximum.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class maximum:
 5 |     def __init__(self):
 6 |         self.save_data_count=None
 7 |         
 8 |         
 9 |     def __call__(self,data):
10 |         if self.save_data_count!=None:
11 |             output=data.pop(0)
12 |             for i in range(1,self.save_data_count):
13 |                 output=tf.maximum(output,data.pop(0))
14 |         else:
15 |             output=data[0]
16 |             for i in range(1,len(data)):
17 |                 output=tf.maximum(output,data[i])
18 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/maxout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class maxout:
 5 |     """Applies Maxout to the input.
 6 | 
 7 |     "Maxout Networks" Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron
 8 |     Courville, Yoshua Bengio. https://arxiv.org/abs/1302.4389
 9 | 
10 |     Usually the operation is performed in the filter/channel dimension. This
11 |     can also be used after Dense layers to reduce number of features.
12 | 
13 |     Args:
14 |       input_shape: Shape of the input tensor.
15 |       num_units: Specifies how many features will remain after maxout
16 |         in the `axis` dimension (usually channel).
17 |         This must be a factor of number of features.
18 |       axis: The dimension where max pooling will be performed. Default is the
19 |         last dimension.
20 | 
21 |     Input shape:
22 |       nD tensor with shape: `(batch_size, ..., axis_dim, ...)`.
23 | 
24 |     Output shape:
25 |       nD tensor with shape: `(batch_size, ..., num_units, ...)`.
26 |     """
27 | 
28 |     def __init__(self, num_units: int, axis: int = -1, input_shape=None):
29 |         self.num_units = num_units
30 |         self.axis = axis
31 |         self.input_shape=input_shape
32 |         if input_shape is not None:
33 |             self.num_channels = self.input_shape[axis]
34 |             if not isinstance(self.num_channels, tf.Tensor) and self.num_channels % self.num_units:
35 |                 raise ValueError(
36 |                     "number of features({}) is not "
37 |                     "a multiple of num_units({})".format(self.num_channels, self.num_units)
38 |                 )
39 |     
40 |             if axis < 0:
41 |                 self.axis_ = axis + len(self.input_shape)
42 |             else:
43 |                 self.axis_ = axis
44 |             assert self.axis_ >= 0, "Find invalid axis: {}".format(self.axis)
45 | 
46 | 
47 |     def __call__(self,data):
48 |         if self.input_shape is None:
49 |             self.input_shape=list(data.shape)
50 |             num_channels = self.input_shape[self.axis]
51 |             if not isinstance(num_channels, tf.Tensor) and num_channels % self.num_units:
52 |                 raise ValueError(
53 |                     "number of features({}) is not "
54 |                     "a multiple of num_units({})".format(num_channels, self.num_units)
55 |                 )
56 |     
57 |             if self.axis < 0:
58 |                 axis = self.axis + len(self.input_shape)
59 |             else:
60 |                 axis = self.axis
61 |             assert axis >= 0, "Find invalid axis: {}".format(self.axis)
62 |         else:
63 |             axis=self.axis_
64 |             num_channels=self.num_channels
65 |             
66 |         expand_shape = self.input_shape[:]
67 |         expand_shape[axis] = self.num_units
68 |         k = num_channels // self.num_units
69 |         expand_shape.insert(axis, k)
70 | 
71 |         output = tf.math.reduce_max(
72 |             tf.reshape(data, expand_shape), axis, keepdims=False
73 |         )
74 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/minimum.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class minimum:
 5 |     def __init__(self):
 6 |         self.save_data_count=None
 7 |         
 8 |         
 9 |     def __call__(self,data):
10 |         if self.save_data_count!=None:
11 |             output=data.pop(0)
12 |             for i in range(1,self.save_data_count):
13 |                 output=tf.minimum(output,data.pop(0))
14 |         else:
15 |             output=data[0]
16 |             for i in range(1,len(data)):
17 |                 output=tf.minimum(output,data[i])
18 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/multiply.py:
--------------------------------------------------------------------------------
 1 | class multiply:
 2 |     def __init__(self):
 3 |         self.save_data_count=None
 4 |         
 5 |         
 6 |     def __call__(self,data):
 7 |         if self.save_data_count!=None:
 8 |             output=data.pop(0)
 9 |             for i in range(1,self.save_data_count):
10 |                 output=output*data.pop(0)
11 |         else:
12 |             output=data[0]
13 |             for i in range(1,len(data)):
14 |                 output=output*data[i]
15 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/patch_dropout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class PatchDropout:
 6 |     """
 7 |     https://arxiv.org/abs/2212.00794 and https://arxiv.org/pdf/2208.07220
 8 |     """
 9 | 
10 |     def __init__(
11 |             self,
12 |             prob: float = 0.5,
13 |             num_prefix_tokens: int = 1,
14 |             ordered: bool = False,
15 |             return_indices: bool = False,
16 |     ):
17 |         assert 0 <= prob < 1.
18 |         self.prob = prob
19 |         self.num_prefix_tokens = num_prefix_tokens  # exclude CLS token (or other prefix tokens)
20 |         self.ordered = ordered
21 |         self.return_indices = return_indices
22 |         self.train_flag=True
23 |         nn.Model.layer_list.append(self)
24 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
25 |             nn.Model.layer_eval[nn.Model.name]=[]
26 |             nn.Model.layer_eval[nn.Model.name].append(self)
27 |         elif nn.Model.name!=None:
28 |             nn.Model.layer_eval[nn.Model.name].append(self)
29 | 
30 |     def __call__(self, x, training=None):
31 |         if training==None:
32 |             training=self.train_flag
33 |         if not training or self.prob == 0.:
34 |             if self.return_indices:
35 |                 return x, None
36 |             return x
37 | 
38 |         if self.num_prefix_tokens:
39 |             prefix_tokens, x = x[:, :self.num_prefix_tokens], x[:, self.num_prefix_tokens:]
40 |         else:
41 |             prefix_tokens = None
42 | 
43 |         B = x.shape[0]
44 |         L = x.shape[1]
45 |         num_keep = max(1, int(L * (1. - self.prob)))
46 |         keep_indices = tf.argsort(tf.random.normal((B, L)), axis=-1)[:, :num_keep]
47 |         if self.ordered:
48 |             # NOTE does not need to maintain patch order in typical transformer use,
49 |             # but possibly useful for debug / visualization
50 |             keep_indices = tf.sort(keep_indices, axis=-1)
51 |         x = tf.gather(x, keep_indices, axis=1, batch_dims=1)
52 | 
53 |         if prefix_tokens is not None:
54 |             x = tf.concat((prefix_tokens, x), axis=1)
55 | 
56 |         if self.return_indices:
57 |             return x, keep_indices
58 |         return x
59 | 


--------------------------------------------------------------------------------
/Note/nn/layer/permute.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class permute:
 5 |     """Permutes the dimensions of the input according to a given pattern.
 6 | 
 7 |     Useful e.g. connecting RNNs and convnets.
 8 | 
 9 |     Args:
10 |       dims: Tuple of integers. Permutation pattern does not include the
11 |         samples dimension. Indexing starts at 1.
12 |         For instance, `(2, 1)` permutes the first and second dimensions
13 |         of the input.
14 | 
15 |     Input shape:
16 |       Arbitrary. Use the keyword argument `input_shape`
17 |       (tuple of integers, does not include the samples axis)
18 |       when using this layer as the first layer in a model.
19 | 
20 |     Output shape:
21 |       Same as the input shape, but with the dimensions re-ordered according
22 |       to the specified pattern.
23 |     """
24 | 
25 |     def __init__(self, dims):
26 |         self.dims = tuple(dims)
27 |         if sorted(dims) != list(range(1, len(dims) + 1)):
28 |             raise ValueError(
29 |                 "Invalid permutation argument `dims` for Permute Layer. "
30 |                 "The set of indices in `dims` must be consecutive and start "
31 |                 f"from 1. Received dims={dims}"
32 |             )
33 | 
34 | 
35 |     def __call__(self, data):
36 |         return tf.transpose(data, perm=(0,) + self.dims)


--------------------------------------------------------------------------------
/Note/nn/layer/pos_embed.py:
--------------------------------------------------------------------------------
 1 | """ Position Embedding Utilities
 2 | 
 3 | Hacked together by / Copyright 2024 NoteDance
 4 | """
 5 | import math
 6 | from typing import List, Optional
 7 | 
 8 | import tensorflow as tf
 9 | from Note import nn
10 | 
11 | 
12 | def resample_abs_pos_embed(
13 |         posemb,
14 |         new_size: List[int],
15 |         old_size: Optional[List[int]] = None,
16 |         num_prefix_tokens: int = 1,
17 |         interpolation: str = 'bicubic',
18 |         antialias: bool = True,
19 |         verbose: bool = False,
20 | ):
21 |     # sort out sizes, assume square if old size not provided
22 |     num_pos_tokens = posemb.shape[1]
23 |     num_new_tokens = new_size[0] * new_size[1] + num_prefix_tokens
24 |     if num_new_tokens == num_pos_tokens and new_size[0] == new_size[1]:
25 |         return posemb
26 | 
27 |     if old_size is None:
28 |         hw = int(math.sqrt(num_pos_tokens - num_prefix_tokens))
29 |         old_size = hw, hw
30 | 
31 |     if num_prefix_tokens:
32 |         posemb_prefix, posemb = posemb[:, :num_prefix_tokens], posemb[:, num_prefix_tokens:]
33 |     else:
34 |         posemb_prefix, posemb = None, posemb
35 | 
36 |     # do the interpolation
37 |     embed_dim = posemb.shape[-1]
38 |     orig_dtype = posemb.dtype
39 |     posemb = tf.cast(posemb, 'float32')  # interpolate needs float32
40 |     posemb = tf.transpose(tf.reshape(posemb, (1, old_size[0], old_size[1], -1)), (0, 3, 1, 2))
41 |     posemb = nn.interpolate(posemb, size=new_size, mode=interpolation, antialias=antialias)
42 |     posemb = tf.reshape(tf.transpose(posemb, (0, 2, 3, 1)), (1, -1, embed_dim))
43 |     posemb = tf.cast(posemb, orig_dtype)
44 | 
45 |     # add back extra (class, etc) prefix tokens
46 |     if posemb_prefix is not None:
47 |         posemb = tf.concat([posemb_prefix, posemb], axis=1)
48 | 
49 |     return posemb
50 | 
51 | 
52 | def resample_abs_pos_embed_nhwc(
53 |         posemb,
54 |         new_size: List[int],
55 |         interpolation: str = 'bicubic',
56 |         antialias: bool = True,
57 |         verbose: bool = False,
58 | ):
59 |     if new_size[0] == posemb.shape[-3] and new_size[1] == posemb.shape[-2]:
60 |         return posemb
61 | 
62 |     orig_dtype = posemb.dtype
63 |     posemb = tf.cast(posemb, 'float32')
64 |     posemb = tf.reshape(posemb, (1, posemb.shape[-3], posemb.shape[-2], posemb.shape[-1]))
65 |     posemb = nn.interpolate(posemb, size=new_size, mode=interpolation, antialias=antialias)
66 |     posemb = tf.cast(posemb, orig_dtype)
67 | 
68 |     return posemb
69 | 


--------------------------------------------------------------------------------
/Note/nn/layer/position_embedding.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class position_embedding:
 6 |   """Creates a positional embedding.
 7 | 
 8 |   Args:
 9 |     max_length: The maximum size of the dynamic sequence.
10 |     initializer: The initializer to use for the embedding weights. Defaults to
11 |       "glorot_uniform".
12 |     seq_axis: The axis of the input tensor where we add the embeddings.
13 | 
14 |   Reference: This layer creates a positional embedding as described in
15 |   [BERT: Pre-training of Deep Bidirectional Transformers for Language
16 |   Understanding](https://arxiv.org/abs/1810.04805).
17 |   """
18 | 
19 |   def __init__(self,
20 |                max_length,
21 |                input_size=None,
22 |                initializer="Xavier",
23 |                seq_axis=1,
24 |                dtype='float32'
25 |                ):
26 | 
27 |     if max_length is None:
28 |       raise ValueError(
29 |           "`max_length` must be an Integer, not `None`."
30 |       )
31 |     self.max_length = max_length
32 |     self.input_size = input_size
33 |     self.initializer = initializer
34 |     self._seq_axis = seq_axis
35 |     self.dtype = dtype
36 |     if input_size is not None:
37 |         self._position_embeddings = nn.initializer([max_length, input_size], initializer, dtype)
38 |         self.param=[self._position_embeddings]
39 | 
40 |   
41 |   def build(self):
42 |       self._position_embeddings = nn.initializer([self.max_length, self.input_size], self.initializer, self.dtype)
43 |       self.param=[self._position_embeddings]
44 |       return
45 | 
46 | 
47 |   def __call__(self, data):
48 |     input_shape = tf.shape(data)
49 |     actual_seq_len = input_shape[self._seq_axis]
50 |     position_embeddings = self._position_embeddings[:actual_seq_len, :]
51 |     new_shape = [1 for _ in data.get_shape().as_list()]
52 |     new_shape[self._seq_axis] = actual_seq_len
53 |     new_shape[-1] = position_embeddings.get_shape().as_list()[-1]
54 |     position_embeddings = tf.reshape(position_embeddings, new_shape)
55 |     return tf.broadcast_to(position_embeddings, input_shape)


--------------------------------------------------------------------------------
/Note/nn/layer/repeat_vector.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def repeat(x, n):
 5 |     """Repeats a 2D tensor.
 6 | 
 7 |     if `x` has shape (samples, dim) and `n` is `2`,
 8 |     the output will have shape `(samples, 2, dim)`.
 9 | 
10 |     Args:
11 |         x: Tensor or variable.
12 |         n: Python integer, number of times to repeat.
13 | 
14 |     Returns:
15 |         A tensor.
16 | 
17 |     Example:
18 | 
19 |         >>> b = tf.constant([[1, 2], [3, 4]])
20 |         >>> b
21 |         <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
22 |         array([[1, 2],
23 |                [3, 4]], dtype=int32)>
24 |         >>> tf.keras.backend.repeat(b, n=2)
25 |         <tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
26 |         array([[[1, 2],
27 |                 [1, 2]],
28 |                [[3, 4],
29 |                 [3, 4]]], dtype=int32)>
30 | 
31 |     """
32 |     x = tf.expand_dims(x, 1)
33 |     pattern = tf.stack([1, n, 1])
34 |     return tf.tile(x, pattern)
35 | 
36 | 
37 | class repeat_vector:
38 |     """Repeats the input n times.
39 | 
40 |     Args:
41 |       n: Integer, repetition factor.
42 |     Input shape: 2D tensor of shape `(num_samples, features)`.
43 |     Output shape: 3D tensor of shape `(num_samples, n, features)`.
44 |     """
45 | 
46 |     def __init__(self, n):
47 |         self.n = n
48 |         if not isinstance(n, int):
49 |             raise TypeError(
50 |                 f"Expected an integer value for `n`, got {type(n)}."
51 |             )
52 | 
53 |     def __call__(self, data):
54 |         return repeat(data, self.n)


--------------------------------------------------------------------------------
/Note/nn/layer/reversible_residual.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class reversible_residual:
 5 |     def __init__(self, f, g):
 6 |         self.f=f
 7 |         self.g=g
 8 |     
 9 |     
10 |     def __call__(self, data):
11 |         data1, data2 = tf.split(data, 2, axis=-1) # split the input into two halves
12 |         output1 = data1 + self.f(data2) # compute the first output half
13 |         output2 = data2 + self.g(output1) # compute the second output half
14 |         output = tf.concat([output1, output2], axis=-1) # concatenate the output halves
15 |         return output


--------------------------------------------------------------------------------
/Note/nn/layer/select_topk.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class select_topk:
 5 |   """Select top-k + random-k tokens according to importance."""
 6 | 
 7 |   def __init__(self,
 8 |                top_k=None,
 9 |                random_k=None,
10 |                ):
11 |     self._top_k = top_k
12 |     self._random_k = random_k
13 | 
14 | 
15 |   def __call__(self, data):
16 |     if self._random_k is None:
17 |       # Pure top-k, not randomness.
18 |       pos = tf.argsort(data, direction="DESCENDING")
19 |       selected = tf.slice(pos, [0, 0], [-1, self._top_k])
20 |       not_selected = tf.slice(pos, [0, self._top_k], [-1, -1])
21 |     elif self._top_k is None:
22 |       # Pure randomness, no top-k.
23 |       pos = tf.argsort(tf.random.uniform(shape=tf.shape(data)),
24 |                        direction="DESCENDING")
25 |       selected = tf.slice(pos, [0, 0], [-1, self._random_k])
26 |       not_selected = tf.slice(pos, [0, self._random_k], [-1, -1])
27 |     else:
28 |       # Top-k plus randomness.
29 |       pos = tf.argsort(data, direction="DESCENDING")
30 |       selected_top_k = tf.slice(pos, [0, 0], [-1, self._top_k])
31 |       pos_left = tf.slice(pos, [0, self._top_k], [-1, -1])
32 | 
33 |       # Randomly shuffle pos_left
34 |       sort_index = tf.argsort(
35 |           tf.random.uniform(shape=tf.shape(pos_left)),
36 |           direction="DESCENDING")
37 |       pos_left = tf.gather(pos_left, sort_index, batch_dims=1, axis=1)
38 | 
39 |       selected_rand = tf.slice(pos_left, [0, 0], [-1, self._random_k])
40 |       not_selected = tf.slice(pos_left, [0, self._random_k], [-1, -1])
41 | 
42 |       selected = tf.concat([selected_top_k, selected_rand], axis=1)
43 | 
44 |     # Return the indices of selected and not-selected tokens.
45 |     return selected, not_selected


--------------------------------------------------------------------------------
/Note/nn/layer/self_attention_mask.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from typing import Optional
 3 | 
 4 | 
 5 | class self_attention_mask:
 6 |   """Create 3D attention mask from a 2D tensor mask.
 7 | 
 8 |     inputs[0]: from_tensor: 2D or 3D Tensor of shape
 9 |       [batch_size, from_seq_length, ...].
10 |     inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length].
11 | 
12 |     Returns:
13 |       float Tensor of shape [batch_size, from_seq_length, to_seq_length].
14 |   """
15 | 
16 |   def __call__(self, inputs, to_mask=None):
17 |     if isinstance(inputs, list) and to_mask is None:
18 |       to_mask = inputs[1]
19 |       inputs = inputs[0]
20 |     return get_mask(inputs, to_mask)
21 | 
22 | 
23 | def get_mask(inputs: tf.Tensor,
24 |              to_mask: tf.Tensor,
25 |              dtype: Optional[tf.DType] = None) -> tf.Tensor:
26 |   """Gets a 3D self-attention mask.
27 | 
28 |   Args:
29 |     inputs: from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length,
30 |       ...].
31 |     to_mask: int32 Tensor of shape [batch_size, to_seq_length].
32 |     dtype: the output Tensor dtype.
33 | 
34 |   Returns:
35 |     float Tensor of shape [batch_size, from_seq_length, to_seq_length].
36 |   """
37 |   from_shape = tf.shape(inputs)
38 |   batch_size = from_shape[0]
39 |   from_seq_length = from_shape[1]
40 |   dtype = inputs.dtype if dtype is None else dtype
41 | 
42 |   to_shape = tf.shape(to_mask)
43 |   to_seq_length = to_shape[1]
44 | 
45 |   to_mask = tf.cast(
46 |       tf.reshape(to_mask, [batch_size, 1, to_seq_length]), dtype=dtype)
47 | 
48 |   return tf.broadcast_to(to_mask, [batch_size, from_seq_length, to_seq_length])


--------------------------------------------------------------------------------
/Note/nn/layer/softmax.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def _large_compatible_negative(tensor_type):
 5 |     """Large negative number as Tensor.
 6 | 
 7 |     This function is necessary because the standard value for epsilon
 8 |     in this module (-1e9) cannot be represented using tf.float16
 9 | 
10 |     Args:
11 |         tensor_type: a dtype to determine the type.
12 | 
13 |     Returns:
14 |         a large negative number.
15 |     """
16 |     # In case of dtype=float16 (e.g., for mixed-precision), the largest
17 |     # negative number (dtypes.float16.min) is divided by 2, in order to
18 |     # avoid overflows when summing negative inputs.
19 |     if tensor_type == tf.float16:
20 |         return tf.float16.min / 2.0
21 |     return -1e9
22 | 
23 | 
24 | class softmax:
25 |     """Softmax activation function.
26 | 
27 |     Input shape:
28 |         Arbitrary. Use the keyword argument `input_shape`
29 |         (tuple of integers, does not include the samples axis)
30 |         when using this layer as the first layer in a model.
31 | 
32 |     Output shape:
33 |         Same shape as the input.
34 | 
35 |     Args:
36 |         axis: Integer, or list of Integers, axis along which the softmax
37 |             normalization is applied.
38 |     Call arguments:
39 |         inputs: The inputs, or logits to the softmax layer.
40 |         mask: A boolean mask of the same shape as `inputs`. The mask
41 |             specifies 1 to keep and 0 to mask. Defaults to `None`.
42 | 
43 | 
44 |     Returns:
45 |         Softmaxed output with the same shape as `inputs`.
46 |     """
47 | 
48 |     def __init__(self, axis=-1):
49 |         self.axis = axis
50 | 
51 | 
52 |     def __call__(self, inputs, mask=None):
53 |         if mask is not None:
54 |             # Since mask is 1.0 for positions we want to keep and 0.0 for masked
55 |             # positions, this operation will create a tensor which is 0.0 for
56 |             # positions we want to attend and -1e.9 for masked positions.
57 |             adder = (1.0 - tf.cast(mask, inputs.dtype)) * (
58 |                 _large_compatible_negative(inputs.dtype)
59 |             )
60 | 
61 |             # Since we are adding it to the raw scores before the softmax, this
62 |             # is effectively the same as removing these entirely.
63 |             inputs += adder
64 |         if isinstance(self.axis, (tuple, list)):
65 |             if len(self.axis) > 1:
66 |                 return tf.exp(
67 |                     inputs
68 |                     - tf.reduce_logsumexp(inputs, axis=self.axis, keepdims=True)
69 |                 )
70 |             else:
71 |                 return tf.nn.softmax(inputs, axis=self.axis[0])
72 |         return tf.nn.softmax(inputs, axis=self.axis)


--------------------------------------------------------------------------------
/Note/nn/layer/space_to_depth.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class SpaceToDepth:
 5 |     def __init__(self, block_size=4):
 6 |         assert block_size == 4
 7 |         self.bs = block_size
 8 | 
 9 |     def __call__(self, x):
10 |         N, H, W, C = x.shape
11 |         x = tf.reshape(x, (N, H // self.bs, self.bs, W // self.bs, self.bs, C))  # (N, H//bs, bs, W//bs, bs, C)
12 |         x = tf.transpose(x, (0, 1, 3, 2, 4, 5))  # (N, bs, bs, C, H//bs, W//bs)
13 |         x = tf.reshape(x, (N, H // self.bs, W // self.bs, C * self.bs * self.bs))  # (N, H//bs, W//bs, C*bs^2)
14 |         return x
15 | 
16 | 
17 | class DepthToSpace:
18 | 
19 |     def __init__(self, block_size):
20 |         self.bs = block_size
21 | 
22 |     def __call__(self, x):
23 |         N, H, W, C = x.shape
24 |         x = tf.reshape(x, (N, H, W, self.bs, self.bs, C // (self.bs ** 2)))  # (N, H, W, bs, bs, C//bs^2)
25 |         x = tf.transpose(x, (0, 1, 3, 2, 4, 5))  # (N, H, bs, W, bs, C//bs^2)
26 |         x = tf.reshape(x, (N, H * self.bs, W * self.bs, C // (self.bs ** 2)))  # (N, H * bs, W * bs, C//bs^2)
27 |         return x


--------------------------------------------------------------------------------
/Note/nn/layer/spatial_dropout1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class spatial_dropout1d:
 6 |     """Spatial 1D dropout layer.
 7 | 
 8 |     This layer randomly sets 1D feature maps along the last dimension to zero with a
 9 |     frequency of `rate` at each step during training time in order to prevent overfitting.
10 |     Inputs not set to zero are scaled up by 1/(1 - rate) such that the sum over all inputs
11 |     is unchanged.
12 | 
13 |     Arguments:
14 |         rate: Float between 0 and 1. Fraction of the input units to drop.
15 |         seed: A Python integer to use as random seed.
16 | 
17 |     Call arguments:
18 |         daat: A 3D tensor.
19 |         train_flag: A Python boolean indicating whether to apply dropout to the inputs or not. 
20 |             If True, the layer will randomly set 1D feature maps to zero with a frequency of rate. 
21 |             If False, the layer will return the inputs unchanged.
22 | 
23 |     References:
24 |         - Efficient Object Localization Using Convolutional Networks
25 |     """
26 | 
27 |     def __init__(self, rate, seed=7):
28 |         self.rate = rate
29 |         self.seed = seed
30 |         self.train_flag = True
31 |         nn.Model.layer_list.append(self)
32 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
33 |             nn.Model.layer_eval[nn.Model.name]=[]
34 |             nn.Model.layer_eval[nn.Model.name].append(self)
35 |         elif nn.Model.name!=None:
36 |             nn.Model.layer_eval[nn.Model.name].append(self)
37 | 
38 |     def __call__(self, data, training=None):
39 |         if training==None:
40 |             training=self.train_flag
41 |         def dropped_inputs():
42 |             # Generate a mask with shape (batch_size, 1, channels)
43 |             noise_shape = (tf.shape(data)[0], 1, tf.shape(data)[2])
44 |             mask = tf.random.stateless_binomial(noise_shape, seed=[self.seed, 0], counts=1, probs=(1 - self.rate), 
45 |                                                 output_dtype=data.dtype)
46 |             # Scale up the input by 1/(1 - rate) and apply the mask
47 |             return data * mask * (1.0 / (1.0 - self.rate))
48 | 
49 |         return tf.cond(training, dropped_inputs, lambda: data)
50 | 


--------------------------------------------------------------------------------
/Note/nn/layer/spatial_dropout2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | class spatial_dropout2d:
 5 |     """Spatial 2D dropout layer.
 6 | 
 7 |     This layer randomly sets 2D feature maps along the last two dimensions to zero with a
 8 |     frequency of `rate` at each step during training time in order to prevent overfitting.
 9 |     Inputs not set to zero are scaled up by 1/(1 - rate) such that the sum over all inputs
10 |     is unchanged.
11 | 
12 |     Arguments:
13 |         rate: Float between 0 and 1. Fraction of the input units to drop.
14 |         seed: A Python integer to use as random seed.
15 | 
16 |     Call arguments:
17 |         data: A 4D tensor.
18 |         train_flag: A Python boolean indicating whether to apply dropout to the inputs or not. 
19 |             If True, the layer will randomly set 2D feature maps to zero with a frequency of rate. 
20 |             If False, the layer will return the inputs unchanged.
21 | 
22 |     References:
23 |         - Efficient Object Localization Using Convolutional Networks
24 |     """
25 | 
26 |     def __init__(self, rate, seed=7):
27 |         self.rate = rate
28 |         self.seed = seed
29 |         self.train_flag = True
30 |         nn.Model.layer_list.append(self)
31 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
32 |             nn.Model.layer_eval[nn.Model.name]=[]
33 |             nn.Model.layer_eval[nn.Model.name].append(self)
34 |         elif nn.Model.name!=None:
35 |             nn.Model.layer_eval[nn.Model.name].append(self)
36 | 
37 |     def __call__(self, data, training=None):
38 |         if training==None:
39 |             training=self.train_flag
40 |         def dropped_inputs():
41 |             # Generate a mask with shape (batch_size, 1, 1, channels)
42 |             noise_shape = (tf.shape(data)[0], 1, 1, tf.shape(data)[3])
43 |             mask = tf.random.stateless_binomial(noise_shape, seed=[self.seed, 0], counts=1, probs=(1 - self.rate), 
44 |                                                 output_dtype=data.dtype)
45 |             # Scale up the input by 1/(1 - rate) and apply the mask
46 |             return data * mask * (1.0 / (1.0 - self.rate))
47 | 
48 |         return tf.cond(training, dropped_inputs, lambda: data)
49 | 


--------------------------------------------------------------------------------
/Note/nn/layer/spatial_dropout3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class spatial_dropout3d:
 6 |     """Spatial 3D dropout layer.
 7 | 
 8 |     This layer randomly sets 3D feature maps along the last three dimensions to zero with a
 9 |     frequency of `rate` at each step during training time in order to prevent overfitting.
10 |     Inputs not set to zero are scaled up by 1/(1 - rate) such that the sum over all inputs
11 |     is unchanged.
12 | 
13 |     Arguments:
14 |         rate: Float between 0 and 1. Fraction of the input units to drop.
15 |         seed: A Python integer to use as random seed.
16 | 
17 |     Call arguments:
18 |         data: A 5D tensor.
19 |         train_flag: A Python boolean indicating whether to apply dropout to the inputs or not. 
20 |             If True, the layer will randomly set 3D feature maps to zero with a frequency of rate. 
21 |             If False, the layer will return the inputs unchanged.
22 | 
23 |     References:
24 |         - Efficient Object Localization Using Convolutional Networks
25 |     """
26 | 
27 |     def __init__(self, rate, seed=None):
28 |         self.rate = rate
29 |         self.seed = seed
30 |         self.train_flag = True
31 |         nn.Model.layer_list.append(self)
32 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
33 |             nn.Model.layer_eval[nn.Model.name]=[]
34 |             nn.Model.layer_eval[nn.Model.name].append(self)
35 |         elif nn.Model.name!=None:
36 |             nn.Model.layer_eval[nn.Model.name].append(self)
37 | 
38 |     def __call__(self, data, training=None):
39 |         if training==None:
40 |             training=self.train_flag
41 |         def dropped_inputs():
42 |             # Generate a mask with shape (batch_size, 1, 1, 1, channels)
43 |             noise_shape = (tf.shape(data)[0], 1, 1, 1, tf.shape(data)[4])
44 |             mask = tf.random.stateless_binomial(noise_shape, seed=[self.seed, 0], counts=1, probs=(1 - self.rate), 
45 |                                                 output_dtype=data.dtype)
46 |             # Scale up the input by 1/(1 - rate) and apply the mask
47 |             return data * mask * (1.0 / (1.0 - self.rate))
48 | 
49 |         return tf.cond(training, dropped_inputs, lambda: data)
50 | 


--------------------------------------------------------------------------------
/Note/nn/layer/stochastic_depth.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note import nn
 3 | 
 4 | 
 5 | class stochastic_depth:
 6 |     def __init__(self, drop_path_rate):
 7 |         self.drop_path_rate=drop_path_rate
 8 |         self.train_flag=True
 9 |         nn.Model.layer_list.append(self)
10 |         if nn.Model.name!=None and nn.Model.name not in nn.Model.layer_eval:
11 |             nn.Model.layer_eval[nn.Model.name]=[]
12 |             nn.Model.layer_eval[nn.Model.name].append(self)
13 |         elif nn.Model.name!=None:
14 |             nn.Model.layer_eval[nn.Model.name].append(self)
15 |     
16 |     
17 |     def __call__(self, x, training=None):
18 |         if training==None:
19 |             training=self.train_flag
20 |         if training:
21 |             keep_prob = 1 - self.drop_path_rate
22 |             shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1)
23 |             random_tensor = keep_prob + tf.random.uniform(shape, 0, 1, dtype=x.dtype)
24 |             random_tensor = tf.floor(random_tensor)
25 |             return (x / keep_prob) * random_tensor
26 |         return x
27 | 


--------------------------------------------------------------------------------
/Note/nn/layer/subtract.py:
--------------------------------------------------------------------------------
1 | class subtract:
2 |     def __call__(self,data1,data2):
3 |         return data1-data2


--------------------------------------------------------------------------------
/Note/nn/layer/thresholded_relu.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class thresholded_relu:
 4 |     """DEPRECATED."""
 5 | 
 6 |     def __init__(self, theta=1.0, dtype='float32'):
 7 |         if theta is None:
 8 |             raise ValueError(
 9 |                 "Theta of a Thresholded ReLU layer cannot be None, expecting a "
10 |                 f"float. Received: {theta}"
11 |             )
12 |         if theta < 0:
13 |             raise ValueError(
14 |                 "The theta value of a Thresholded ReLU layer "
15 |                 f"should be >=0. Received: {theta}"
16 |             )
17 |         self.theta = tf.convert_to_tensor(theta, dtype=dtype)
18 |         self.dtype = dtype
19 | 
20 |     def __call__(self, data):
21 |         if data.dtype!=self.dtype:
22 |             data = tf.cast(data, self.dtype)
23 |         return data * tf.greater(data, self.theta)


--------------------------------------------------------------------------------
/Note/nn/layer/unfold.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.layer.zeropadding2d import zeropadding2d
 3 | 
 4 | 
 5 | class unfold:
 6 |     def __init__(self, kernel, stride=1, padding=0, dilation=1):
 7 |         self.kernel = kernel
 8 |         self.stride = stride
 9 |         self.padding = padding
10 |         self.dilation = dilation
11 |         self.zeropadding2d = zeropadding2d(padding=padding)
12 |     
13 |     def __call__(self, x):
14 |         x = self.zeropadding2d(x)
15 |         x = tf.image.extract_patches(x, sizes=[1, self.kernel, self.kernel, 1], strides=[1, self.stride, self.stride, 1], rates=[1, self.dilation, self.dilation, 1], padding='VALID')
16 |         x = tf.reshape(x, (x.shape[0], -1, x.shape[-1]))
17 |         return x
18 | 


--------------------------------------------------------------------------------
/Note/nn/layer/unit_norm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class unit_norm:
 5 |     """Unit normalization layer.
 6 | 
 7 |     Normalize a batch of inputs so that each input in the batch has a L2 norm
 8 |     equal to 1 (across the axes specified in `axis`).
 9 | 
10 |     Example:
11 | 
12 |     >>> data = tf.constant(np.arange(6).reshape(2, 3), dtype=tf.float32)
13 |     >>> normalized_data = Note.nn.layer.unit_normalization.unit_normalization().output(data)
14 |     >>> print(tf.reduce_sum(normalized_data[0, :] ** 2).numpy())
15 |     1.0
16 | 
17 |     Args:
18 |       axis: Integer or list/tuple. The axis or axes to normalize across.
19 |         Typically this is the features axis or axes. The left-out axes are
20 |         typically the batch axis or axes. Defaults to `-1`, the last dimension
21 |         in the input.
22 |     """
23 | 
24 |     def __init__(self, axis=-1):
25 |         if isinstance(axis, (list, tuple)):
26 |             self.axis = list(axis)
27 |         elif isinstance(axis, int):
28 |             self.axis = axis
29 |         else:
30 |             raise TypeError(
31 |                 "Invalid value for `axis` argument: "
32 |                 "expected an int or a list/tuple of ints. "
33 |                 f"Received: axis={axis}"
34 |             )
35 | 
36 | 
37 |     def __call__(self, inputs):
38 |         return tf.linalg.l2_normalize(inputs, axis=self.axis)


--------------------------------------------------------------------------------
/Note/nn/layer/up_sampling1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class up_sampling1d:
 4 |     def __init__(self, size):
 5 |         self.size = size
 6 | 
 7 |     def __call__(self, inputs):
 8 |         # Repeat each time step size times along the temporal axis
 9 |         outputs = tf.repeat(inputs, self.size, axis=1)
10 |         return outputs


--------------------------------------------------------------------------------
/Note/nn/layer/up_sampling2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class up_sampling2d:
 4 |     def __init__(self, size):
 5 |         # Convert size to a tuple if it is an integer
 6 |         if isinstance(size, int):
 7 |             size = (size, size)
 8 |         self.size = size
 9 | 
10 |     def __call__(self, inputs):
11 |         # Repeat each spatial dimension size times along the height and width axes
12 |         outputs = tf.repeat(inputs, self.size[0], axis=1)
13 |         outputs = tf.repeat(outputs, self.size[1], axis=2)
14 |         return outputs


--------------------------------------------------------------------------------
/Note/nn/layer/up_sampling3d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class up_sampling3d:
 4 |     def __init__(self, size):
 5 |         # Convert size to a tuple if it is an integer
 6 |         if isinstance(size, int):
 7 |             size = (size, size, size)
 8 |         self.size = size
 9 | 
10 |     def __call__(self, inputs):
11 |         # Repeat each spatial dimension size times along the depth, height and width axes
12 |         outputs = tf.repeat(inputs, self.size[0], axis=1)
13 |         outputs = tf.repeat(outputs, self.size[1], axis=2)
14 |         outputs = tf.repeat(outputs, self.size[2], axis=3)
15 |         return outputs


--------------------------------------------------------------------------------
/Note/nn/layer/vector_quantizer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.initializer import initializer
 3 | 
 4 | class vector_quantizer:
 5 |   def __init__(
 6 |       self,
 7 |       embedding_dim: int,
 8 |       num_embeddings: int,
 9 |       commitment_cost: float,
10 |       dtype = 'float32',
11 |   ):
12 |     self.embedding_dim = embedding_dim
13 |     self.num_embeddings = num_embeddings
14 |     self.commitment_cost = commitment_cost
15 | 
16 |     self._embedding_shape = [embedding_dim, num_embeddings]
17 |     self._embedding_dtype = dtype
18 |     self.embeddings = initializer(self._embedding_shape, 
19 |                         ['VarianceScaling',1.0,'fan_in','uniform'], 
20 |                         dtype)
21 | 
22 |   def __call__(self, data, is_training):
23 |     flat_inputs = tf.reshape(data, [-1, self.embedding_dim])
24 | 
25 |     distances = (
26 |         tf.math.reduce_sum(tf.math.square(flat_inputs), 1, keepdims=True) -
27 |         2 * tf.matmul(flat_inputs, self.embeddings) +
28 |         tf.math.reduce_sum(tf.math.square(self.embeddings), 0, keepdims=True))
29 | 
30 |     encoding_indices = tf.math.argmax(-distances, 1)
31 |     encodings = tf.one_hot(encoding_indices,
32 |                                self.num_embeddings,
33 |                                dtype=distances.dtype)
34 | 
35 |     encoding_indices = tf.reshape(encoding_indices, data.shape[:-1])
36 |     quantized = self.quantize(encoding_indices)
37 | 
38 |     e_latent_loss = tf.math.reduce_mean(
39 |         tf.math.square(tf.stop_gradient(quantized) - data))
40 |     q_latent_loss = tf.math.reduce_mean(
41 |         tf.math.square(quantized - tf.stop_gradient(data)))
42 |     loss = q_latent_loss + self.commitment_cost * e_latent_loss
43 | 
44 |     quantized = data + tf.stop_gradient(quantized - data)
45 |     avg_probs = tf.math.reduce_mean(encodings, 0)
46 |     perplexity = tf.math.exp(-tf.math.reduce_sum(avg_probs * tf.math.log(avg_probs + 1e-10)))
47 | 
48 |     return {
49 |         "quantize": quantized,
50 |         "loss": loss,
51 |         "perplexity": perplexity,
52 |         "encodings": encodings,
53 |         "encoding_indices": encoding_indices,
54 |         "distances": distances,
55 |     }
56 | 
57 |   def quantize(self, encoding_indices):
58 |     w = tf.transpose(self.embeddings, [1, 0])
59 |     return w[(encoding_indices,)]


--------------------------------------------------------------------------------
/Note/nn/layer/zeropadding1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def normalize_tuple(value, n, allow_zero=False):
 5 |     error_msg = (
 6 |         f"integers. Received: {value}"
 7 |     )
 8 | 
 9 |     if isinstance(value, int):
10 |         value_tuple = (value,) * n
11 |     else:
12 |         try:
13 |             value_tuple = tuple(value)
14 |         except TypeError:
15 |             raise ValueError(error_msg)
16 |         if len(value_tuple) != n:
17 |             raise ValueError(error_msg)
18 |         for single_value in value_tuple:
19 |             try:
20 |                 int(single_value)
21 |             except (ValueError, TypeError):
22 |                 error_msg += (
23 |                     f"including element {single_value} of "
24 |                     f"type {type(single_value)}"
25 |                 )
26 |                 raise ValueError(error_msg)
27 | 
28 |     if allow_zero:
29 |         unqualified_values = {v for v in value_tuple if v < 0}
30 |         req_msg = ">= 0"
31 |     else:
32 |         unqualified_values = {v for v in value_tuple if v <= 0}
33 |         req_msg = "> 0"
34 | 
35 |     if unqualified_values:
36 |         error_msg += (
37 |             f" including {unqualified_values}"
38 |             f" that does not satisfy the requirement `{req_msg}`."
39 |         )
40 |         raise ValueError(error_msg)
41 | 
42 |     return value_tuple
43 | 
44 | 
45 | class zeropadding1d:
46 |     def __init__(self,input_size=None, padding=None):
47 |         self.pattern = None
48 |         if padding is not None:
49 |             padding = normalize_tuple(
50 |                 padding, 2, allow_zero=True
51 |             )
52 |             self.pattern = [[0, 0], [padding[0], padding[1]], [0, 0]]
53 |         self.input_size=input_size
54 |         if input_size!=None:
55 |             self.output_size=input_size
56 |     
57 |     
58 |     def __call__(self, data, padding=1):
59 |         if self.pattern is None:
60 |             padding = normalize_tuple(
61 |                 padding, 2, allow_zero=True
62 |             )
63 |             pattern = [[0, 0], [padding[0], padding[1]], [0, 0]]
64 |         else:
65 |             pattern = self.pattern
66 |         return tf.pad(data, pattern)


--------------------------------------------------------------------------------
/Note/nn/nan_to_num.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def nan_to_num(tensor, nan=0.0, out=None):
5 |     result = tf.where(tf.math.is_nan(tensor), tf.constant(nan, dtype=tensor.dtype), tensor)
6 |     if out is not None:
7 |         out.assign(result)
8 |         return out
9 |     return result


--------------------------------------------------------------------------------
/Note/nn/narrow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def narrow(tensor, dim, start, size):
 5 |     rank = tf.rank(tensor)
 6 |     shape = tf.shape(tensor)
 7 |     dim = tf.where(dim < 0, dim + rank, dim)
 8 |     before = tf.zeros([dim], dtype=tf.int32)
 9 |     after = tf.zeros([rank - dim - 1], dtype=tf.int32)
10 |     begin = tf.concat([before, tf.expand_dims(start, 0), after], axis=0)
11 |     one_hot = tf.one_hot(dim, rank, dtype=tf.int32)
12 |     size_for_tf_slice = (shape * (1 - one_hot)) + size * one_hot
13 |     return tf.slice(tensor, begin, size_for_tf_slice)
14 | 


--------------------------------------------------------------------------------
/Note/nn/optimizer/optimizer.py:
--------------------------------------------------------------------------------
 1 | from keras.src import backend
 2 | from keras.src.api_export import keras_export
 3 | from Note.nn.optimizers import base_optimizer
 4 | 
 5 | if backend.backend() == "tensorflow":
 6 |     from keras.src.backend.tensorflow.optimizer import (
 7 |         TFOptimizer as BackendOptimizer,
 8 |     )
 9 | elif backend.backend() == "torch":
10 |     from keras.src.backend.torch.optimizers import (
11 |         TorchOptimizer as BackendOptimizer,
12 |     )
13 | elif backend.backend() == "jax":
14 |     from keras.src.backend.jax.optimizer import JaxOptimizer as BackendOptimizer
15 | else:
16 | 
17 |     class BackendOptimizer(base_optimizer.BaseOptimizer):
18 |         pass
19 | 
20 | 
21 | @keras_export(["keras.Optimizer", "keras.optimizers.Optimizer"])
22 | class Optimizer(BackendOptimizer, base_optimizer.BaseOptimizer):
23 |     pass
24 | 
25 | 
26 | Optimizer.__doc__ = base_optimizer.BaseOptimizer.__doc__
27 | base_optimizer_keyword_args = base_optimizer.base_optimizer_keyword_args


--------------------------------------------------------------------------------
/Note/nn/optimizer/orthograd.py:
--------------------------------------------------------------------------------
 1 | """ OrthoGrad
 2 | https://arxiv.org/abs/2501.04697
 3 | 
 4 | Copyright 2025 NoteDance
 5 | """
 6 | import tensorflow as tf
 7 | from keras.src.optimizers import optimizer
 8 | 
 9 | 
10 | class OrthoGrad(optimizer.Optimizer):
11 |     def __init__(
12 |         self,
13 |         base_optimizer=None,
14 |         name="orthograd",
15 |     ):
16 |         super().__init__(
17 |             learning_rate=1.,
18 |             name=name,
19 |         )
20 |         self.base_optimizer = base_optimizer
21 |     
22 |     def reset(self):
23 |         pass
24 | 
25 |     def build(self, var_list):
26 |         if self.built:
27 |             return
28 |         super().build(var_list)
29 |     
30 |     @staticmethod
31 |     def _orthogonalize_gradients(self, params, grads):
32 |         """
33 |         Projects the gradient g to be orthogonal to the current weights w.
34 | 
35 |         g_orth = g - ( (w·g)/(w·w + eps) ) * w
36 | 
37 |         And then re-scales g_orth to have the same norm as g.
38 |         """
39 |         for p, g in zip(params, grads):
40 |             w = tf.reshape(p, [-1])
41 |             g = tf.reshape(g, [-1])
42 | 
43 |             w_norm_sq = tf.tensordot(w, w, axes=1) + 1e-30
44 |             proj = tf.tensordot(w, g, axes=1) / w_norm_sq
45 |             g_orth = g - proj * w
46 | 
47 |             g_norm = tf.norm(g, ord=2)
48 |             g_orth_norm = tf.norm(g_orth, ord=2) + 1e-30
49 |             g_orth_scaled = g_orth * (g_norm / g_orth_norm)
50 | 
51 |             grads[self._get_variable_index(p)] = tf.reshape(g_orth_scaled, g.shape)
52 |     
53 |     def _backend_update_step(self, grads, trainable_variables, learning_rate):
54 |         """Collective update_step that can be overridden by the backend.
55 |     
56 |         It is overridden by torch for performance reasons, and
57 |         by TF to support tf.distribute.
58 |         """
59 |         self.update_step(grads, trainable_variables, learning_rate)
60 |     
61 |     def apply_gradients(self, grads_and_vars, tape=None):
62 |         self.tape = tape
63 |         grads, trainable_variables = zip(*grads_and_vars)
64 |         self.apply(grads, trainable_variables)
65 |         # Return iterations for compat with tf.keras.
66 |         return self._iterations
67 | 
68 |     def update_step(self, grads, trainable_variables, learning_rate):
69 |         self._orthogonalize_gradients(trainable_variables, grads)
70 |         if self.tape is None:
71 |             self.base_optimizer.apply_gradients(zip(grads, trainable_variables))
72 |         else:
73 |             self.base_optimizer.apply_gradients(zip(grads, trainable_variables), self.tape)
74 |         
75 |     def get_config(self):
76 |         config = super().get_config()
77 |         config.update(
78 |             {
79 |                 "base_optimizer": self.base_optimizer,
80 |             }
81 |         )
82 |         return config
83 | 	
84 |     def _apply_weight_decay(self, variables):
85 |         pass


--------------------------------------------------------------------------------
/Note/nn/pairwise_distance.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | def pairwise_distance(x, y, p=2, eps=1e-6, keepdim=False):
4 |   diff = tf.math.subtract(x, y) + eps
5 |   norm = tf.math.reduce_sum(tf.math.abs(diff ** p), axis=-1) ** (1/p)
6 |   if keepdim==True:
7 |       norm = tf.expand_dims(norm, -1)
8 |   return norm


--------------------------------------------------------------------------------
/Note/nn/parallel/assign_device.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def assign_device(p, device): # a function to assign device according to the process index p and the device type
 5 |     devices = tf.config.list_physical_devices(device) # get a list of available devices of the given type
 6 |     if devices: # if there are any devices of the given type
 7 |         try:
 8 |             tf.config.set_visible_devices(devices[p % len(devices)], device) # set the device with index p modulo the number of devices as visible
 9 |             device = '/' + device + ':' + str(p % len(devices)) # store the device name as an attribute
10 |         except RuntimeError as e: # catch any runtime error
11 |             raise e # raise the error message
12 |     else: # if there are no devices of the given type
13 |         device = '/CPU:0' # use CPU device as default
14 |     return device
15 | 


--------------------------------------------------------------------------------
/Note/nn/parallel/assign_device_pytorch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def assign_device(p, device): # a function to assign device according to the process index p and the device type
 5 |     if device == 'GPU': # if the device type is GPU
 6 |         if torch.cuda.is_available(): # if there are any available GPU devices
 7 |             try:
 8 |                 torch.cuda.set_device(p % torch.cuda.device_count()) # set the device with index p modulo the number of devices as current
 9 |                 device = torch.device('cuda', p % torch.cuda.device_count()) # create a torch.device object with the current device
10 |             except RuntimeError as e: # catch any runtime error
11 |                 raise e # raise the error message
12 |         else: # if there are no available GPU devices
13 |             device = torch.device('cpu') # use CPU device as default
14 |     elif device == 'CPU': # if the device type is CPU
15 |         device = torch.device('cpu') # use CPU device as default
16 |     else: # if the device type is neither GPU nor CPU
17 |         raise ValueError('Invalid device type') # raise a value error
18 |     return device
19 | 


--------------------------------------------------------------------------------
/Note/nn/parameter.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from Note.nn.Model import Model
 3 | 
 4 | def Parameter(data,trainable=True,name=None):
 5 |     param=tf.Variable(data,trainable=trainable)
 6 |     if name!=None:
 7 |         param=tf.Variable(param,name=name)
 8 |     Model.param.append(param)
 9 |     if Model.name!=None and Model.name not in Model.layer_param:
10 |         Model.layer_param[Model.name]=[]
11 |         Model.layer_param[Model.name].append(param)
12 |     elif Model.name_!=None:
13 |         Model.layer_param[Model.name].append(param)
14 |     return param
15 | 


--------------------------------------------------------------------------------
/Note/nn/pos_embed.py:
--------------------------------------------------------------------------------
 1 | """ Position Embedding Utilities
 2 | 
 3 | Hacked together by / Copyright 2024 NoteDance
 4 | """
 5 | import logging
 6 | import math
 7 | from typing import List, Optional
 8 | 
 9 | import tensorflow as tf
10 | from Note import nn
11 | 
12 | _logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def resample_abs_pos_embed(
16 |         posemb,
17 |         new_size: List[int],
18 |         old_size: Optional[List[int]] = None,
19 |         num_prefix_tokens: int = 1,
20 |         interpolation: str = 'bicubic',
21 |         antialias: bool = True,
22 |         verbose: bool = False,
23 | ):
24 |     # sort out sizes, assume square if old size not provided
25 |     num_pos_tokens = posemb.shape[1]
26 |     num_new_tokens = new_size[0] * new_size[1] + num_prefix_tokens
27 |     if num_new_tokens == num_pos_tokens and new_size[0] == new_size[1]:
28 |         return posemb
29 | 
30 |     if old_size is None:
31 |         hw = int(math.sqrt(num_pos_tokens - num_prefix_tokens))
32 |         old_size = hw, hw
33 | 
34 |     if num_prefix_tokens:
35 |         posemb_prefix, posemb = posemb[:, :num_prefix_tokens], posemb[:, num_prefix_tokens:]
36 |     else:
37 |         posemb_prefix, posemb = None, posemb
38 | 
39 |     # do the interpolation
40 |     embed_dim = posemb.shape[-1]
41 |     orig_dtype = posemb.dtype
42 |     posemb = tf.cast(posemb, 'float32')  # interpolate needs float32
43 |     posemb = tf.reshape(posemb, (1, old_size[0], old_size[1], -1))
44 |     posemb = nn.interpolate(posemb, size=new_size, mode=interpolation, antialias=antialias)
45 |     posemb = tf.reshape(posemb, (1, -1, embed_dim))
46 |     posemb = tf.cast(posemb, orig_dtype)
47 | 
48 |     # add back extra (class, etc) prefix tokens
49 |     if posemb_prefix is not None:
50 |         posemb = tf.concat([posemb_prefix, posemb], axis=1)
51 | 
52 |     if verbose:
53 |         _logger.info(f'Resized position embedding: {old_size} to {new_size}.')
54 | 
55 |     return posemb
56 | 
57 | 
58 | def resample_abs_pos_embed_nhwc(
59 |         posemb,
60 |         new_size: List[int],
61 |         interpolation: str = 'bicubic',
62 |         antialias: bool = True,
63 |         verbose: bool = False,
64 | ):
65 |     if new_size[0] == posemb.shape[-3] and new_size[1] == posemb.shape[-2]:
66 |         return posemb
67 | 
68 |     orig_dtype = posemb.dtype
69 |     posemb = tf.cast(posemb, 'float32')
70 |     posemb = nn.interpolate(posemb, size=new_size, mode=interpolation, antialias=antialias)
71 |     posemb = tf.cast(posemb, orig_dtype)
72 | 
73 |     if verbose:
74 |         _logger.info(f'Resized position embedding: {posemb.shape[-3:-1]} to {new_size}.')
75 | 
76 |     return posemb


--------------------------------------------------------------------------------
/Note/nn/positional_encoding.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def positional_encoding(max_len,d_model):
 5 |     pos_enc=tf.zeros((max_len,d_model))
 6 |     angles=tf.zeros((max_len,d_model))
 7 |     pos=tf.range(max_len)[:,tf.newaxis]
 8 |     i=tf.range(d_model)[tf.newaxis,:]
 9 |     even_mask=i%2==0
10 |     odd_mask=~even_mask
11 |     angles=tf.where(even_mask,tf.math.sin(pos/(10000**(i/d_model))),angles)
12 |     angles=tf.where(odd_mask,tf.math.cos(pos/(10000**((i-1)/d_model))),angles)
13 |     pos_enc=angles
14 |     return pos_enc


--------------------------------------------------------------------------------
/Note/nn/restore.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import pickle
 3 |     
 4 | 
 5 | def restore(path):
 6 |     input_file=open(path,'rb')
 7 |     model=pickle.load(input_file)
 8 |     optimizer=tf.keras.optimizers.deserialize(pickle.load(input_file))
 9 |     input_file.close()
10 |     return model,optimizer
11 | 
12 | 
13 | def restore_param(path):
14 |     input_file=open(path,'rb')
15 |     param=pickle.load(input_file)
16 |     input_file.close()
17 |     return param
18 | 


--------------------------------------------------------------------------------
/Note/nn/scaled_dot_product_attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import math
 3 | 
 4 | def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False, scale=None):
 5 |     L, S = query.shape[-2], key.shape[-2]
 6 |     scale_factor = 1 / math.sqrt(query.shape[-1]) if scale is None else scale
 7 |     attn_bias = tf.zeros((L, S), dtype=query.dtype)
 8 |     if is_causal:
 9 |         assert attn_mask is None
10 |         temp_mask = tf.linalg.band_part(tf.ones((L, S), dtype=tf.bool), -1, 0)
11 |         attn_bias = tf.where(temp_mask, attn_bias, float("-inf"))
12 |         attn_bias = tf.cast(attn_bias, query.dtype)
13 | 
14 |     if attn_mask is not None:
15 |         if attn_mask.dtype == tf.bool:
16 |             attn_bias = tf.where(attn_mask, attn_bias, float("-inf"))
17 |         else:
18 |             attn_bias += attn_mask
19 |     attn_weight = tf.matmul(query, tf.transpose(key, (0, 1, 3, 2))) * scale_factor
20 |     attn_weight += attn_bias
21 |     attn_weight = tf.nn.softmax(attn_weight, axis=-1)
22 |     attn_weight = tf.nn.dropout(attn_weight, dropout_p)
23 |     return tf.matmul(attn_weight, value)
24 | 


--------------------------------------------------------------------------------
/Note/nn/softplus.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def softplus(x, beta=1.0, threshold=20.0):
 5 |     if beta != 1.0:
 6 |         x = x * beta
 7 |     x = tf.where(
 8 |         x > threshold,
 9 |         x,
10 |         tf.math.log(1 + tf.exp(x))
11 |     )
12 |     if beta != 1.0:
13 |         x = x / beta
14 |     return x


--------------------------------------------------------------------------------
/Note/nn/solve_triangular.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def solve_triangular(A, B, *, upper, left=True, unitriangular=False):
 5 |     if unitriangular:
 6 |         diag_shape = tf.shape(tf.linalg.diag_part(A))
 7 |         ones = tf.ones(diag_shape, dtype=A.dtype)
 8 |         A = tf.linalg.set_diag(A, ones)
 9 |     
10 |     if left:
11 |         X = tf.linalg.triangular_solve(A, B, lower=not upper)
12 |     else:
13 |         X_T = tf.linalg.triangular_solve(tf.transpose(A), tf.transpose(B), lower=upper)
14 |         X = tf.transpose(X_T)
15 |     return X


--------------------------------------------------------------------------------
/Note/nn/sparse_mask.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def sparse_mask(dense_tensor, mask_sparse):
5 |     indices = mask_sparse.indices  # [N, ndims]
6 |     values = tf.gather_nd(dense_tensor, indices)
7 |     return tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=mask_sparse.dense_shape)


--------------------------------------------------------------------------------
/Note/sr.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | 
 4 | def save(data,path):
 5 |     output_file=open(path,'wb')
 6 |     pickle.dump(data,output_file)
 7 |     output_file.close()
 8 |     return
 9 | 
10 | 
11 | def restore(path):
12 |     input_file=open(path,'rb')
13 |     data=pickle.load(input_file)
14 |     input_file.close()
15 |     return data
16 | 


--------------------------------------------------------------------------------
/Note/version.py:
--------------------------------------------------------------------------------
1 | version='7.0'
2 | date='2023.7.7'
3 | 


--------------------------------------------------------------------------------