├── python ├── __init__.py ├── dummy_binary.py ├── riegeli │ ├── base │ │ ├── __init__.py │ │ └── BUILD │ ├── records │ │ ├── __init__.py │ │ ├── examples │ │ │ ├── __init__.py │ │ │ ├── BUILD │ │ │ └── write_read_records.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── records_test.proto │ │ │ └── BUILD │ │ ├── records_metadata.proto │ │ ├── skipped_region.py │ │ └── record_position.h │ ├── tensorflow │ │ ├── __init__.py │ │ ├── ops │ │ │ └── __init__.py │ │ ├── kernel_tests │ │ │ └── __init__.py │ │ └── BUILD │ ├── BUILD │ ├── BUILD.tpl │ ├── py_extension.bzl │ ├── bytes │ │ └── BUILD │ └── __init__.py ├── MANIFEST.in ├── README.md ├── BUILD ├── build_pip_package.sh └── setup.py ├── tf_dependency ├── BUILD └── BUILD.tpl ├── riegeli ├── .gitignore ├── tensorflow │ ├── BUILD │ ├── ops │ │ └── riegeli_dataset_ops.cc │ └── io │ │ └── BUILD ├── BUILD ├── records │ ├── README.md │ ├── skipped_region.cc │ ├── records_metadata.proto │ └── tools │ │ ├── riegeli_summary.proto │ │ ├── tfrecord_recognizer.h │ │ └── tfrecord_recognizer.cc ├── chunk_encoding │ ├── README.md │ ├── hash.h │ ├── constants.h │ ├── decompressor.cc │ ├── brotli_encoder_selection.h │ ├── chunk_encoder.cc │ ├── hash.cc │ ├── chunk.cc │ └── brotli_encoder_selection.cc ├── bzip2 │ ├── bzip2_error.h │ ├── BUILD │ └── bzip2_error.cc ├── base │ ├── chain.h │ ├── external_ref.h │ ├── string_utils.h │ ├── external_data.cc │ ├── port.h │ ├── constexpr.h │ ├── buffer.cc │ ├── errno_mapping.h │ ├── string_utils.cc │ ├── shared_buffer.cc │ ├── status.cc │ ├── unicode.h │ ├── stream_utils.cc │ ├── closing_ptr.h │ ├── parallelism.h │ ├── ownership.h │ ├── assert.cc │ ├── estimated_allocated_size.h │ ├── external_data.h │ ├── types.h │ ├── memory_estimator.cc │ ├── parallelism.cc │ ├── cord_iterator_span.cc │ ├── type_id.h │ └── initializer_internal.h ├── xz │ ├── xz_error.h │ └── BUILD ├── varint │ ├── BUILD │ └── varint_internal.h ├── ordered_varint │ ├── BUILD │ └── ordered_varint_internal.h ├── bytes │ ├── cfile_internal.cc │ ├── cfile_internal.h │ ├── fd_internal.h │ ├── iostream_internal.h │ ├── fd_internal.cc │ ├── string_reader.cc │ ├── fd_internal_for_cc.h │ ├── array_backward_writer.cc │ └── std_io.cc ├── zlib │ ├── zlib_error.h │ ├── zlib_error.cc │ └── BUILD ├── brotli │ ├── brotli_allocator.cc │ ├── brotli_dictionary.cc │ └── BUILD ├── digests │ ├── sha256_digester.h │ ├── sha512_digester.h │ ├── sha512_256_digester.h │ ├── md5_digester.h │ ├── sha1_digester.h │ ├── adler32_digester.h │ ├── adler32_digester.cc │ ├── crc32_digester.cc │ ├── highwayhash_digester.cc │ ├── crc32_digester.h │ └── openssl_digester.h ├── endian │ └── BUILD ├── messages │ ├── map_entry_field.h │ └── message_wire_format.h ├── lines │ ├── newline.h │ └── BUILD ├── lz4 │ ├── lz4_dictionary.cc │ └── BUILD ├── snappy │ ├── hadoop │ │ └── BUILD │ ├── framed │ │ └── BUILD │ ├── snappy_streams.h │ └── BUILD ├── gcs │ ├── gcs_internal.h │ └── BUILD ├── containers │ └── BUILD ├── text │ └── BUILD ├── zstd │ └── BUILD └── csv │ └── BUILD ├── MANIFEST.in ├── doc └── index.md ├── .bazelrc ├── README.md ├── CONTRIBUTING.md ├── configure └── MODULE.bazel /python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/dummy_binary.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tf_dependency/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/riegeli/base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /riegeli/.gitignore: -------------------------------------------------------------------------------- 1 | bazel-* 2 | -------------------------------------------------------------------------------- /python/riegeli/records/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/riegeli/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/riegeli/records/examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/riegeli/records/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/riegeli/tensorflow/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include riegeli *.py 2 | -------------------------------------------------------------------------------- /python/riegeli/tensorflow/kernel_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /riegeli/tensorflow/BUILD: -------------------------------------------------------------------------------- 1 | exports_files([ 2 | "kernels/riegeli_dataset_ops.cc", 3 | "ops/riegeli_dataset_ops.cc", 4 | ]) 5 | -------------------------------------------------------------------------------- /python/riegeli/records/tests/records_test.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package riegeli.tests; 4 | 5 | message SimpleMessage { 6 | optional int32 id = 1; 7 | optional bytes payload = 2; 8 | } 9 | -------------------------------------------------------------------------------- /riegeli/BUILD: -------------------------------------------------------------------------------- 1 | # Riegeli, file format for storing a sequence of records. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | exports_files(["LICENSE"]) 11 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include .bazelrc 2 | include *.md 3 | include LICENSE 4 | include MANIFEST.in 5 | include WORKSPACE 6 | include configure 7 | recursive-include doc * 8 | recursive-include python * 9 | recursive-include riegeli * 10 | recursive-include third_party * 11 | -------------------------------------------------------------------------------- /python/riegeli/records/examples/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_binary") 2 | 3 | package(features = ["header_modules"]) 4 | 5 | licenses(["notice"]) 6 | 7 | py_binary( 8 | name = "write_read_records", 9 | srcs = ["write_read_records.py"], 10 | deps = [ 11 | "//python/riegeli", 12 | "//python/riegeli/records/tests:records_test_py_pb2", 13 | ], 14 | ) 15 | -------------------------------------------------------------------------------- /riegeli/records/README.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | Riegeli/records is a file format for storing a sequence of records. 4 | 5 | The format supports sequential writing, appending to a previously created file, 6 | sequential reading, and seeking while reading. Data are optionally compressed, 7 | with special support for the case when records are proto messages. Data 8 | corruption is detected and a reading can be resumed after skipping over a 9 | corrupted region. 10 | -------------------------------------------------------------------------------- /doc/index.md: -------------------------------------------------------------------------------- 1 | # Riegeli 2 | 3 | *Riegeli/records* is a file format for storing a sequence of string records, 4 | typically serialized protocol buffers. It supports dense compression, fast 5 | decoding, seeking, detection and optional skipping of data corruption, filtering 6 | of proto message fields for even faster decoding, and parallel encoding. 7 | 8 | * [Specification of Riegeli/records file format](riegeli_records_file_format.md). 9 | * [Specifying options for writing Riegeli/records files](record_writer_options.md). 10 | -------------------------------------------------------------------------------- /tf_dependency/BUILD.tpl: -------------------------------------------------------------------------------- 1 | package( 2 | default_visibility = ["//visibility:public"], 3 | features = ["header_modules"], 4 | ) 5 | 6 | cc_library( 7 | name = "tf_header_lib", 8 | hdrs = [":tf_header_include"], 9 | includes = ["include"], 10 | visibility = ["//visibility:public"], 11 | ) 12 | 13 | cc_library( 14 | name = "libtensorflow_framework", 15 | srcs = [":%{TF_SHARED_LIBRARY_NAME}"], 16 | visibility = ["//visibility:public"], 17 | ) 18 | 19 | %{TF_HEADER_GENRULE} 20 | %{TF_SHARED_LIBRARY_GENRULE} 21 | -------------------------------------------------------------------------------- /.bazelrc: -------------------------------------------------------------------------------- 1 | # Enable Bzlmod by default. 2 | common --enable_bzlmod 3 | 4 | # Use C++17. 5 | build --cxxopt=-std=c++17 6 | build --host_cxxopt=-std=c++17 7 | 8 | # Make Python protos faster by backing them with C++ protos. 9 | # TODO: Reenable once protobuf releases 10 | # https://github.com/protocolbuffers/protobuf/pull/22633 11 | # i.e. in version > 32.0. Or possibly switch to upb. 12 | # build --define=use_fast_cpp_protos=true 13 | 14 | # Options from ./configure 15 | # This is currently disabled because TensorFlow does not support bzlmod, 16 | # hence Riegeli/TensorFlow bindings are broken anyway. 17 | # import %workspace%/configure.bazelrc 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Riegeli 2 | 3 | *Riegeli/records* is a file format for storing a sequence of string records, 4 | typically serialized protocol buffers. It supports dense compression, fast 5 | decoding, seeking, detection and optional skipping of data corruption, filtering 6 | of proto message fields for even faster decoding, and parallel encoding. 7 | 8 | See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md). 9 | 10 | # Status 11 | 12 | Riegeli file format will only change in a backward compatible way (i.e. future 13 | readers will understand current files, but current readers might not understand 14 | files using future features). 15 | 16 | Riegeli C++ API might change in incompatible ways. 17 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Riegeli 2 | 3 | *Riegeli/records* is a file format for storing a sequence of string records, 4 | typically serialized protocol buffers. It supports dense compression, fast 5 | decoding, seeking, detection and optional skipping of data corruption, filtering 6 | of proto message fields for even faster decoding, and parallel encoding. 7 | 8 | See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md). 9 | 10 | # Status 11 | 12 | Riegeli file format will only change in a backward compatible way (i.e. future 13 | readers will understand current files, but current readers might not understand 14 | files using future features). 15 | 16 | Riegeli C++ API might change in incompatible ways. 17 | -------------------------------------------------------------------------------- /python/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_binary") 2 | 3 | package(default_visibility = ["//visibility:private"]) 4 | 5 | licenses(["notice"]) 6 | 7 | # These dependencies are gathered in a py_binary, instead of directly in 8 | # sh_binary data, so that bazel links __init__.py files to runfiles. 9 | py_binary( 10 | name = "dummy_binary", 11 | srcs = ["dummy_binary.py"], 12 | srcs_version = "PY3", 13 | deps = [ 14 | "//python/riegeli", 15 | "//python/riegeli/tensorflow:riegeli_dataset_ops", 16 | ], 17 | ) 18 | 19 | sh_binary( 20 | name = "build_pip_package", 21 | srcs = ["build_pip_package.sh"], 22 | data = [ 23 | "MANIFEST.in", 24 | "README.md", 25 | "setup.py", 26 | ":dummy_binary", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /python/riegeli/BUILD: -------------------------------------------------------------------------------- 1 | # Riegeli, file format for storing a sequence of records. 2 | 3 | load("@rules_python//python:defs.bzl", "py_library") 4 | 5 | package( 6 | default_visibility = ["//visibility:public"], 7 | features = ["header_modules"], 8 | ) 9 | 10 | licenses(["notice"]) 11 | 12 | exports_files(["LICENSE"]) 13 | 14 | py_library( 15 | name = "riegeli", 16 | srcs = ["__init__.py"], 17 | imports = [".."], 18 | deps = [ 19 | "//python/riegeli/base:riegeli_error", 20 | "//python/riegeli/records:record_position", 21 | "//python/riegeli/records:record_reader", 22 | "//python/riegeli/records:record_writer", 23 | "//python/riegeli/records:records_metadata_py_pb2", 24 | "//python/riegeli/records:skipped_region", 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/README.md: -------------------------------------------------------------------------------- 1 | # Purpose 2 | 3 | Riegeli/transpose transforms protocol buffer byte streams into a custom data 4 | format that can be compressed around 20% more densely. The additional transform 5 | slows down both compression and decompression by around 50%. Often this is still 6 | a desirable trade-off, but in the end it depends on the compressed data and 7 | other system requirements such as latency vs. resource use. 8 | 9 | # Detailed design 10 | 11 | Transposition of a set of protocol buffers means that we associate a container 12 | with each tag. Then all the values corresponding to a specific tag are stored in 13 | the container associated with it. Invocation of a general purpose compression 14 | algorithm on the concatenation of these containers offers better compression 15 | ratios than it's invocation on the concatenation of the original binary encoding 16 | of the protocol buffers. 17 | -------------------------------------------------------------------------------- /python/riegeli/records/tests/BUILD: -------------------------------------------------------------------------------- 1 | load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library") 2 | load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library") 3 | load("@rules_python//python:defs.bzl", "py_test") 4 | 5 | package( 6 | default_visibility = [ 7 | "//python/riegeli:__subpackages__", 8 | ], 9 | features = ["header_modules"], 10 | ) 11 | 12 | licenses(["notice"]) 13 | 14 | py_test( 15 | name = "records_test", 16 | srcs = ["records_test.py"], 17 | deps = [ 18 | ":records_test_py_pb2", 19 | "//python/riegeli", 20 | "@absl_py//absl/logging", 21 | "@absl_py//absl/testing:absltest", 22 | "@absl_py//absl/testing:parameterized", 23 | "@com_google_protobuf//:protobuf_python", 24 | ], 25 | ) 26 | 27 | proto_library( 28 | name = "records_test_proto", 29 | srcs = ["records_test.proto"], 30 | ) 31 | 32 | py_proto_library( 33 | name = "records_test_py_pb2", 34 | deps = ["records_test_proto"], 35 | ) 36 | -------------------------------------------------------------------------------- /riegeli/bzip2/bzip2_error.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BZIP2_BZIP2_ERROR_H_ 16 | #define RIEGELI_BZIP2_BZIP2_ERROR_H_ 17 | 18 | #include "absl/status/status.h" 19 | #include "absl/strings/string_view.h" 20 | 21 | namespace riegeli::bzip2_internal { 22 | 23 | absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code); 24 | 25 | } // namespace riegeli::bzip2_internal 26 | 27 | #endif // RIEGELI_BZIP2_BZIP2_ERROR_H_ 28 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /riegeli/base/chain.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_CHAIN_H_ 16 | #define RIEGELI_BASE_CHAIN_H_ 17 | 18 | #include "riegeli/base/chain_base.h" // IWYU pragma: export 19 | #include "riegeli/base/chain_details.h" // IWYU pragma: export 20 | #include "riegeli/base/external_ref_base.h" // IWYU pragma: keep 21 | #include "riegeli/base/external_ref_support.h" // IWYU pragma: keep 22 | 23 | #endif // RIEGELI_BASE_CHAIN_H_ 24 | -------------------------------------------------------------------------------- /riegeli/base/external_ref.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_EXTERNAL_REF_H_ 16 | #define RIEGELI_BASE_EXTERNAL_REF_H_ 17 | 18 | #include "riegeli/base/chain_base.h" // IWYU pragma: keep 19 | #include "riegeli/base/chain_details.h" // IWYU pragma: keep 20 | #include "riegeli/base/external_ref_base.h" // IWYU pragma: export 21 | #include "riegeli/base/external_ref_support.h" // IWYU pragma: export 22 | 23 | #endif // RIEGELI_BASE_EXTERNAL_REF_H_ 24 | -------------------------------------------------------------------------------- /riegeli/records/skipped_region.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/records/skipped_region.h" 16 | 17 | #include 18 | #include 19 | 20 | #include "absl/strings/str_cat.h" 21 | 22 | namespace riegeli { 23 | 24 | std::string SkippedRegion::ToString() const { 25 | return absl::StrCat("[", begin_, "..", end_, "): ", message_); 26 | } 27 | 28 | void SkippedRegion::Output(std::ostream& dest) const { dest << ToString(); } 29 | 30 | } // namespace riegeli 31 | -------------------------------------------------------------------------------- /riegeli/xz/xz_error.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_XZ_XZ_ERROR_H_ 16 | #define RIEGELI_XZ_XZ_ERROR_H_ 17 | 18 | #include "absl/status/status.h" 19 | #include "absl/strings/string_view.h" 20 | #include "lzma.h" 21 | 22 | namespace riegeli::xz_internal { 23 | 24 | absl::Status XzErrorToStatus(absl::string_view operation, 25 | lzma_ret liblzma_code); 26 | 27 | } // namespace riegeli::xz_internal 28 | 29 | #endif // RIEGELI_XZ_XZ_ERROR_H_ 30 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/hash.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_CHUNK_ENCODING_HASH_H_ 16 | #define RIEGELI_CHUNK_ENCODING_HASH_H_ 17 | 18 | #include 19 | 20 | #include "absl/strings/string_view.h" 21 | #include "riegeli/base/chain.h" 22 | 23 | namespace riegeli::chunk_encoding_internal { 24 | 25 | uint64_t Hash(absl::string_view data); 26 | uint64_t Hash(const Chain& data); 27 | 28 | } // namespace riegeli::chunk_encoding_internal 29 | 30 | #endif // RIEGELI_CHUNK_ENCODING_HASH_H_ 31 | -------------------------------------------------------------------------------- /riegeli/varint/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "varint_reading", 12 | srcs = [ 13 | "varint_internal.h", 14 | "varint_reading.cc", 15 | ], 16 | hdrs = ["varint_reading.h"], 17 | deps = [ 18 | "//riegeli/base:arithmetic", 19 | "//riegeli/base:assert", 20 | "//riegeli/bytes:reader", 21 | "@com_google_absl//absl/base:core_headers", 22 | "@com_google_absl//absl/strings:cord", 23 | "@com_google_absl//absl/strings:string_view", 24 | ], 25 | ) 26 | 27 | cc_library( 28 | name = "varint_writing", 29 | srcs = ["varint_internal.h"], 30 | hdrs = ["varint_writing.h"], 31 | deps = [ 32 | "//riegeli/base:arithmetic", 33 | "//riegeli/base:constexpr", 34 | "//riegeli/bytes:backward_writer", 35 | "//riegeli/bytes:writer", 36 | "@com_google_absl//absl/base:core_headers", 37 | "@com_google_absl//absl/numeric:bits", 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /riegeli/varint/varint_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_VARINT_VARINT_INTERNAL_H_ 16 | #define RIEGELI_VARINT_VARINT_INTERNAL_H_ 17 | 18 | // IWYU pragma: private, include "riegeli/varint/varint_reading.h" 19 | // IWYU pragma: private, include "riegeli/varint/varint_writing.h" 20 | 21 | #include 22 | 23 | namespace riegeli { 24 | 25 | inline constexpr size_t kMaxLengthVarint32 = 5; 26 | inline constexpr size_t kMaxLengthVarint64 = 10; 27 | 28 | } // namespace riegeli 29 | 30 | #endif // RIEGELI_VARINT_VARINT_INTERNAL_H_ 31 | -------------------------------------------------------------------------------- /riegeli/ordered_varint/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "ordered_varint_reading", 12 | srcs = [ 13 | "ordered_varint_internal.h", 14 | "ordered_varint_reading.cc", 15 | ], 16 | hdrs = ["ordered_varint_reading.h"], 17 | deps = [ 18 | "//riegeli/base:assert", 19 | "//riegeli/bytes:reader", 20 | "//riegeli/endian:endian_reading", 21 | "@com_google_absl//absl/base:core_headers", 22 | ], 23 | ) 24 | 25 | cc_library( 26 | name = "ordered_varint_writing", 27 | srcs = [ 28 | "ordered_varint_internal.h", 29 | "ordered_varint_writing.cc", 30 | ], 31 | hdrs = ["ordered_varint_writing.h"], 32 | deps = [ 33 | "//riegeli/base:arithmetic", 34 | "//riegeli/base:assert", 35 | "//riegeli/bytes:writer", 36 | "//riegeli/endian:endian_writing", 37 | "@com_google_absl//absl/base:core_headers", 38 | "@com_google_absl//absl/numeric:bits", 39 | ], 40 | ) 41 | -------------------------------------------------------------------------------- /riegeli/bytes/cfile_internal.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/bytes/cfile_internal.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "absl/base/optimization.h" 22 | #include "riegeli/bytes/fd_internal.h" 23 | 24 | namespace riegeli::cfile_internal { 25 | 26 | std::string FilenameForCFile(FILE* file) { 27 | const int fd = fileno(file); 28 | if (ABSL_PREDICT_FALSE(fd < 0)) { 29 | return ""; 30 | } else { 31 | return fd_internal::FilenameForFd(fd); 32 | } 33 | } 34 | 35 | } // namespace riegeli::cfile_internal 36 | -------------------------------------------------------------------------------- /riegeli/base/string_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_STRING_UTILS_H_ 16 | #define RIEGELI_BASE_STRING_UTILS_H_ 17 | 18 | #include 19 | 20 | #include 21 | 22 | #include "absl/base/nullability.h" 23 | 24 | ABSL_POINTERS_DEFAULT_NONNULL 25 | 26 | namespace riegeli { 27 | 28 | // Resizes `dest` to `new_size`, ensuring that repeated growth has the cost 29 | // proportional to the final size. New contents are unspecified. 30 | void ResizeStringAmortized(std::string& dest, size_t new_size); 31 | 32 | } // namespace riegeli 33 | 34 | #endif // RIEGELI_BASE_STRING_UTILS_H_ 35 | -------------------------------------------------------------------------------- /riegeli/zlib/zlib_error.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_ZLIB_ZLIB_ERROR_H_ 16 | #define RIEGELI_ZLIB_ZLIB_ERROR_H_ 17 | 18 | #include "absl/base/attributes.h" 19 | #include "absl/status/status.h" 20 | #include "absl/strings/string_view.h" 21 | 22 | namespace riegeli::zlib_internal { 23 | 24 | ABSL_ATTRIBUTE_COLD absl::Status ZlibErrorToStatus(absl::string_view operation, 25 | int zlib_code, 26 | const char* details); 27 | 28 | } // namespace riegeli::zlib_internal 29 | 30 | #endif // RIEGELI_ZLIB_ZLIB_ERROR_H_ 31 | -------------------------------------------------------------------------------- /python/riegeli/base/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | load("@rules_python//python:defs.bzl", "py_library") 3 | 4 | package( 5 | default_visibility = ["//python/riegeli:__subpackages__"], 6 | features = ["header_modules"], 7 | ) 8 | 9 | licenses(["notice"]) 10 | 11 | cc_library( 12 | name = "utils", 13 | srcs = ["utils.cc"], 14 | hdrs = ["utils.h"], 15 | data = [":riegeli_error"], # Python module imported from C++. 16 | # utils.cc has #define before #include to influence what the included 17 | # files provide. 18 | features = ["-use_header_modules"], 19 | deps = [ 20 | "//riegeli/base:arithmetic", 21 | "//riegeli/base:assert", 22 | "//riegeli/base:chain", 23 | "//riegeli/base:compare", 24 | "//riegeli/base:types", 25 | "@com_google_absl//absl/base:core_headers", 26 | "@com_google_absl//absl/status", 27 | "@com_google_absl//absl/strings", 28 | "@com_google_absl//absl/strings:string_view", 29 | "@com_google_absl//absl/types:span", 30 | "@rules_python//python/cc:current_py_cc_headers", 31 | ], 32 | ) 33 | 34 | py_library( 35 | name = "riegeli_error", 36 | srcs = ["riegeli_error.py"], 37 | ) 38 | -------------------------------------------------------------------------------- /riegeli/brotli/brotli_allocator.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/brotli/brotli_allocator.h" 16 | 17 | #include 18 | 19 | namespace riegeli { 20 | 21 | namespace brotli_internal { 22 | 23 | void* RiegeliBrotliAllocFunc(void* opaque, size_t size) { 24 | return static_cast(opaque)->Alloc(size); 25 | } 26 | 27 | void RiegeliBrotliFreeFunc(void* opaque, void* ptr) { 28 | static_cast(opaque)->Free(ptr); 29 | } 30 | 31 | } // namespace brotli_internal 32 | 33 | BrotliAllocator::Interface::~Interface() {} 34 | 35 | } // namespace riegeli 36 | -------------------------------------------------------------------------------- /riegeli/base/external_data.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/external_data.h" 16 | 17 | #include 18 | 19 | #include "absl/strings/string_view.h" 20 | 21 | namespace riegeli { 22 | 23 | ExternalData ExternalDataCopy(absl::string_view data) { 24 | char* storage = nullptr; 25 | if (!data.empty()) { 26 | storage = static_cast(operator new(data.size())); 27 | std::memcpy(storage, data.data(), data.size()); 28 | } 29 | return ExternalData{ExternalStorage(storage, operator delete), 30 | absl::string_view(storage, data.size())}; 31 | } 32 | 33 | } // namespace riegeli 34 | -------------------------------------------------------------------------------- /riegeli/digests/sha256_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_SHA256_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_SHA256_DIGESTER_H_ 17 | 18 | #include "openssl/base.h" 19 | #include "openssl/sha.h" 20 | #include "riegeli/digests/openssl_digester.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing SHA-256 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | using Sha256Digester = OpenSslDigester; 28 | 29 | } // namespace riegeli 30 | 31 | #endif // RIEGELI_DIGESTS_SHA256_DIGESTER_H_ 32 | -------------------------------------------------------------------------------- /riegeli/digests/sha512_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_SHA512_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_SHA512_DIGESTER_H_ 17 | 18 | #include "openssl/base.h" 19 | #include "openssl/sha.h" 20 | #include "riegeli/digests/openssl_digester.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing SHA-512 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | using Sha512Digester = OpenSslDigester; 28 | 29 | } // namespace riegeli 30 | 31 | #endif // RIEGELI_DIGESTS_SHA512_DIGESTER_H_ 32 | -------------------------------------------------------------------------------- /riegeli/base/port.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_PORT_H_ 16 | #define RIEGELI_BASE_PORT_H_ 17 | 18 | #include "absl/base/nullability.h" 19 | 20 | ABSL_POINTERS_DEFAULT_NONNULL 21 | 22 | // Clang has `__has_builtin()`. Other compilers need other means to detect 23 | // availability of builtins. 24 | #ifdef __has_builtin 25 | #define RIEGELI_INTERNAL_HAS_BUILTIN(x) __has_builtin(x) 26 | #else 27 | #define RIEGELI_INTERNAL_HAS_BUILTIN(x) 0 28 | #endif 29 | 30 | #define RIEGELI_INTERNAL_IS_GCC_VERSION(major, minor) \ 31 | (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) 32 | 33 | #endif // RIEGELI_BASE_PORT_H_ 34 | -------------------------------------------------------------------------------- /riegeli/ordered_varint/ordered_varint_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_ 16 | #define RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_ 17 | 18 | // IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_reading.h" 19 | // IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_writing.h" 20 | 21 | #include 22 | 23 | namespace riegeli { 24 | 25 | inline constexpr size_t kMaxLengthOrderedVarint32 = 5; 26 | inline constexpr size_t kMaxLengthOrderedVarint64 = 9; 27 | 28 | } // namespace riegeli 29 | 30 | #endif // RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_ 31 | -------------------------------------------------------------------------------- /riegeli/base/constexpr.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_CONSTEXPR_H_ 16 | #define RIEGELI_BASE_CONSTEXPR_H_ 17 | 18 | #include "absl/base/nullability.h" 19 | #include "riegeli/base/port.h" 20 | 21 | ABSL_POINTERS_DEFAULT_NONNULL 22 | 23 | namespace riegeli { 24 | 25 | // Returns `true` if the value of the expression is known at compile time. 26 | #if RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_constant_p) || \ 27 | RIEGELI_INTERNAL_IS_GCC_VERSION(3, 1) 28 | #define RIEGELI_IS_CONSTANT(expr) __builtin_constant_p(expr) 29 | #else 30 | #define RIEGELI_IS_CONSTANT(expr) false 31 | #endif 32 | 33 | } // namespace riegeli 34 | 35 | #endif // RIEGELI_BASE_CONSTEXPR_H_ 36 | -------------------------------------------------------------------------------- /riegeli/digests/sha512_256_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_ 17 | 18 | #include "openssl/base.h" 19 | #include "openssl/sha.h" 20 | #include "riegeli/digests/openssl_digester.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing SHA-512/256 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | using Sha512_256Digester = 27 | OpenSslDigester; 29 | 30 | } // namespace riegeli 31 | 32 | #endif // RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_ 33 | -------------------------------------------------------------------------------- /riegeli/base/buffer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/buffer.h" 16 | 17 | #include 18 | 19 | #include "absl/strings/string_view.h" 20 | #include "riegeli/base/arithmetic.h" 21 | 22 | namespace riegeli { 23 | 24 | void Buffer::DumpStructure(absl::string_view substr, std::ostream& dest) const { 25 | dest << "[buffer] {"; 26 | if (!substr.empty()) { 27 | if (substr.data() != data()) { 28 | dest << " space_before: " << PtrDistance(data(), substr.data()); 29 | } 30 | dest << " space_after: " 31 | << PtrDistance(substr.data() + substr.size(), data() + capacity()); 32 | } 33 | dest << " }"; 34 | } 35 | 36 | } // namespace riegeli 37 | -------------------------------------------------------------------------------- /riegeli/bytes/cfile_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BYTES_CFILE_INTERNAL_H_ 16 | #define RIEGELI_BYTES_CFILE_INTERNAL_H_ 17 | 18 | #include 19 | 20 | #include 21 | 22 | namespace riegeli::cfile_internal { 23 | 24 | // Infers a filename from the fd corresponding to the `FILE` by reading the 25 | // symlink target for `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns 26 | // a `absl::StrCat("")` placeholder instead), or returning 27 | // "" if there is no corresponding fd. 28 | std::string FilenameForCFile(FILE* file); 29 | 30 | } // namespace riegeli::cfile_internal 31 | 32 | #endif // RIEGELI_BYTES_CFILE_INTERNAL_H_ 33 | -------------------------------------------------------------------------------- /riegeli/base/errno_mapping.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_ERRNO_MAPPING_H_ 16 | #define RIEGELI_BASE_ERRNO_MAPPING_H_ 17 | 18 | #ifdef _WIN32 19 | #include 20 | #endif 21 | 22 | #include "absl/status/status.h" 23 | #ifdef _WIN32 24 | #include "absl/strings/string_view.h" 25 | #endif 26 | 27 | namespace riegeli { 28 | 29 | // Converts `absl::StatusCode` to `errno` value. 30 | int StatusCodeToErrno(absl::StatusCode status_code); 31 | 32 | #ifdef _WIN32 33 | absl::Status WindowsErrorToStatus(uint32_t error_number, 34 | absl::string_view message); 35 | #endif // _WIN32 36 | 37 | } // namespace riegeli 38 | 39 | #endif // RIEGELI_BASE_ERRNO_MAPPING_H_ 40 | -------------------------------------------------------------------------------- /riegeli/endian/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "endian_reading", 12 | hdrs = ["endian_reading.h"], 13 | deps = [ 14 | "//riegeli/base:null_safe_memcpy", 15 | "//riegeli/base:type_traits", 16 | "//riegeli/bytes:reader", 17 | "@com_google_absl//absl/base", 18 | "@com_google_absl//absl/base:config", 19 | "@com_google_absl//absl/base:core_headers", 20 | "@com_google_absl//absl/numeric:int128", 21 | "@com_google_absl//absl/types:span", 22 | ], 23 | ) 24 | 25 | cc_library( 26 | name = "endian_writing", 27 | hdrs = ["endian_writing.h"], 28 | deps = [ 29 | "//riegeli/base:null_safe_memcpy", 30 | "//riegeli/base:type_traits", 31 | "//riegeli/bytes:backward_writer", 32 | "//riegeli/bytes:writer", 33 | "@com_google_absl//absl/base", 34 | "@com_google_absl//absl/base:config", 35 | "@com_google_absl//absl/base:core_headers", 36 | "@com_google_absl//absl/numeric:int128", 37 | "@com_google_absl//absl/strings:string_view", 38 | "@com_google_absl//absl/types:span", 39 | ], 40 | ) 41 | -------------------------------------------------------------------------------- /riegeli/messages/map_entry_field.h: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_ 16 | #define RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_ 17 | 18 | // IWYU pragma: private, include "riegeli/messages/field_handlers.h" 19 | // IWYU pragma: private, include "riegeli/messages/serialized_message_backward_writer.h" 20 | // IWYU pragma: private, include "riegeli/messages/serialized_message_writer.h" 21 | 22 | #include "absl/base/nullability.h" 23 | 24 | ABSL_POINTERS_DEFAULT_NONNULL 25 | 26 | namespace riegeli { 27 | 28 | // Specifies a field of synthetic map entry message. 29 | enum MapEntryField { kMapEntryKey = 1, kMapEntryValue = 2 }; 30 | 31 | } // namespace riegeli 32 | 33 | #endif // RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_ 34 | -------------------------------------------------------------------------------- /riegeli/base/string_utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/string_utils.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "absl/base/nullability.h" 22 | #include "riegeli/base/arithmetic.h" 23 | 24 | ABSL_POINTERS_DEFAULT_NONNULL 25 | 26 | namespace riegeli { 27 | 28 | void ResizeStringAmortized(std::string& dest, size_t new_size) { 29 | if (new_size > dest.capacity()) { 30 | dest.reserve( 31 | dest.capacity() == std::string().capacity() 32 | ? new_size 33 | : UnsignedMax(new_size, 34 | UnsignedMin(dest.capacity() + dest.capacity() / 2, 35 | dest.max_size()))); 36 | } 37 | dest.resize(new_size); 38 | } 39 | 40 | } // namespace riegeli 41 | -------------------------------------------------------------------------------- /riegeli/digests/md5_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_MD5_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_MD5_DIGESTER_H_ 17 | 18 | #include "openssl/base.h" 19 | #include "openssl/md5.h" 20 | #include "riegeli/digests/openssl_digester.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing MD5 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | // 27 | // Warning: MD5 as a cryptographic hash function is broken. 28 | // Use this only if a preexisting format has already decided to use MD5. 29 | // Please contact ise-team@ in case of doubt. 30 | using Md5Digester = OpenSslDigester; 32 | 33 | } // namespace riegeli 34 | 35 | #endif // RIEGELI_DIGESTS_MD5_DIGESTER_H_ 36 | -------------------------------------------------------------------------------- /riegeli/digests/sha1_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_SHA1_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_SHA1_DIGESTER_H_ 17 | 18 | #include "openssl/base.h" 19 | #include "openssl/sha.h" 20 | #include "riegeli/digests/openssl_digester.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing SHA-1 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | // 27 | // Warning: SHA-1 as a cryptographic hash function is broken. 28 | // Use this only if a preexisting format has already decided to use SHA-1. 29 | // Please contact ise-team@ in case of doubt. 30 | using Sha1Digester = OpenSslDigester; 32 | 33 | } // namespace riegeli 34 | 35 | #endif // RIEGELI_DIGESTS_SHA1_DIGESTER_H_ 36 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/constants.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_CHUNK_ENCODING_CONSTANTS_H_ 16 | #define RIEGELI_CHUNK_ENCODING_CONSTANTS_H_ 17 | 18 | #include 19 | 20 | #include 21 | 22 | namespace riegeli { 23 | 24 | // These values are frozen in the file format. 25 | enum class ChunkType : uint8_t { 26 | kFileSignature = 's', 27 | kFileMetadata = 'm', 28 | kPadding = 'p', 29 | kSimple = 'r', 30 | kTransposed = 't', 31 | }; 32 | 33 | // These values are frozen in the file format. 34 | enum class CompressionType : uint8_t { 35 | kNone = 0, 36 | kBrotli = 'b', 37 | kZstd = 'z', 38 | kSnappy = 's', 39 | }; 40 | 41 | inline constexpr uint64_t kMaxNumRecords = 42 | std::numeric_limits::max() >> 8; 43 | 44 | } // namespace riegeli 45 | 46 | #endif // RIEGELI_CHUNK_ENCODING_CONSTANTS_H_ 47 | -------------------------------------------------------------------------------- /riegeli/digests/adler32_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_ADLER32_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_ADLER32_DIGESTER_H_ 17 | 18 | #include 19 | 20 | #include "absl/strings/string_view.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing Adler32 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | class Adler32Digester { 27 | public: 28 | Adler32Digester() : Adler32Digester(1) {} 29 | 30 | explicit Adler32Digester(uint32_t seed); 31 | 32 | Adler32Digester(const Adler32Digester& that) = default; 33 | Adler32Digester& operator=(const Adler32Digester& that) = default; 34 | 35 | void Write(absl::string_view src); 36 | uint32_t Digest() { return adler_; } 37 | 38 | private: 39 | uint32_t adler_; 40 | }; 41 | 42 | } // namespace riegeli 43 | 44 | #endif // RIEGELI_DIGESTS_ADLER32_DIGESTER_H_ 45 | -------------------------------------------------------------------------------- /riegeli/base/shared_buffer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/shared_buffer.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "absl/strings/string_view.h" 22 | #include "riegeli/base/arithmetic.h" 23 | 24 | namespace riegeli { 25 | 26 | void SharedBuffer::DumpStructure(absl::string_view substr, 27 | std::ostream& dest) const { 28 | dest << "[shared_buffer] {"; 29 | const size_t ref_count = GetRefCount(); 30 | if (ref_count != 1) dest << " ref_count: " << ref_count; 31 | if (!substr.empty()) { 32 | if (substr.data() != data()) { 33 | dest << " space_before: " << PtrDistance(data(), substr.data()); 34 | } 35 | dest << " space_after: " 36 | << PtrDistance(substr.data() + substr.size(), data() + capacity()); 37 | } 38 | dest << " }"; 39 | } 40 | 41 | } // namespace riegeli 42 | -------------------------------------------------------------------------------- /riegeli/digests/adler32_digester.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/digests/adler32_digester.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/strings/string_view.h" 21 | #include "riegeli/base/arithmetic.h" 22 | #include "zconf.h" 23 | #include "zlib.h" 24 | 25 | namespace riegeli { 26 | 27 | Adler32Digester::Adler32Digester(uint32_t seed) : adler_(seed) { 28 | // This checks CPU features. 29 | adler32_z(0, nullptr, 0); 30 | } 31 | 32 | void Adler32Digester::Write(absl::string_view src) { 33 | if (ABSL_PREDICT_FALSE(src.empty())) { 34 | // `adler32_z(state, nullptr, 0)` exceptionally returns 1, not `state`. 35 | return; 36 | } 37 | adler_ = IntCast(adler32_z( 38 | IntCast(adler_), reinterpret_cast(src.data()), 39 | IntCast(src.size()))); 40 | } 41 | 42 | } // namespace riegeli 43 | -------------------------------------------------------------------------------- /python/riegeli/BUILD.tpl: -------------------------------------------------------------------------------- 1 | load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") 2 | 3 | licenses(["restricted"]) 4 | 5 | package( 6 | default_visibility = ["//visibility:public"], 7 | features = ["header_modules"], 8 | ) 9 | 10 | toolchain( 11 | name = "toolchain", 12 | toolchain = ":py_runtime_pair", 13 | toolchain_type = "@bazel_tools//tools/python:toolchain_type", 14 | ) 15 | 16 | # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib 17 | # See https://docs.python.org/3/extending/windows.html 18 | cc_import( 19 | name = "python_lib", 20 | interface_library = select({ 21 | ":windows": ":python_import_lib", 22 | # A placeholder for Unix platforms which makes --no_build happy. 23 | "//conditions:default": "not-existing.lib", 24 | }), 25 | system_provided = 1, 26 | ) 27 | 28 | cc_library( 29 | name = "python_headers", 30 | hdrs = [":python_include"], 31 | deps = select({ 32 | ":windows": [":python_lib"], 33 | "//conditions:default": [], 34 | }), 35 | includes = ["python_include"], 36 | ) 37 | 38 | cc_library( 39 | name = "numpy_headers", 40 | hdrs = [":numpy_include"], 41 | includes = ["numpy_include"], 42 | ) 43 | 44 | config_setting( 45 | name = "windows", 46 | values = {"cpu": "x64_windows"}, 47 | visibility = ["//visibility:public"], 48 | ) 49 | 50 | %{PYTHON_RUNTIME_PAIR} 51 | %{PYTHON_INCLUDE_GENRULE} 52 | %{NUMPY_INCLUDE_GENRULE} 53 | %{PYTHON_IMPORT_LIB_GENRULE} 54 | -------------------------------------------------------------------------------- /riegeli/digests/crc32_digester.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/digests/crc32_digester.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/strings/string_view.h" 21 | #include "riegeli/base/arithmetic.h" 22 | #include "zconf.h" 23 | #include "zlib.h" 24 | 25 | namespace riegeli { 26 | 27 | Crc32Digester::Crc32Digester(uint32_t seed) : crc_(seed) { 28 | // This checks CPU features. 29 | crc32_z(0, nullptr, 0); 30 | } 31 | 32 | void Crc32Digester::Write(absl::string_view src) { 33 | if (ABSL_PREDICT_FALSE(src.empty())) { 34 | // `crc32_z(state, nullptr, 0)` exceptionally returns 0, not `state`. 35 | return; 36 | } 37 | crc_ = IntCast(crc32_z(IntCast(crc_), 38 | reinterpret_cast(src.data()), 39 | IntCast(src.size()))); 40 | } 41 | 42 | } // namespace riegeli 43 | -------------------------------------------------------------------------------- /riegeli/base/status.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/status.h" 16 | 17 | #include "absl/status/status.h" 18 | #include "absl/strings/cord.h" 19 | #include "absl/strings/str_cat.h" 20 | #include "absl/strings/string_view.h" 21 | 22 | namespace riegeli { 23 | 24 | absl::Status SetMessage(const absl::Status& status, absl::string_view message) { 25 | absl::Status result(status.code(), message); 26 | status.ForEachPayload( 27 | [&](absl::string_view type_url, const absl::Cord& payload) { 28 | result.SetPayload(type_url, payload); 29 | }); 30 | return result; 31 | } 32 | 33 | absl::Status Annotate(const absl::Status& status, absl::string_view detail) { 34 | if (status.ok() || detail.empty()) return status; 35 | return SetMessage(status, status.message().empty() 36 | ? detail 37 | : absl::StrCat(status.message(), "; ", detail)); 38 | } 39 | 40 | } // namespace riegeli 41 | -------------------------------------------------------------------------------- /riegeli/base/unicode.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_UNICODE_H_ 16 | #define RIEGELI_BASE_UNICODE_H_ 17 | 18 | #ifdef _WIN32 19 | 20 | #include 21 | 22 | #include "absl/base/nullability.h" 23 | #include "absl/strings/string_view.h" 24 | #include "absl/types/span.h" 25 | 26 | ABSL_POINTERS_DEFAULT_NONNULL 27 | 28 | namespace riegeli { 29 | 30 | // Converts from UTF-8 string to `wchar_t[]`. 31 | // 32 | // Returns `false` on failure. 33 | bool Utf8ToWide(absl::string_view src, std::wstring& dest); 34 | 35 | // Converts from `wchar_t[]` to UTF-8 string. 36 | // 37 | // Returns `false` on failure. 38 | bool WideToUtf8(absl::Span src, std::string& dest); 39 | 40 | // Converts from `wchar_t[]` to UTF-8 string. 41 | // 42 | // Emits replacement characters on failure. 43 | std::string WideToUtf8Lossy(absl::Span src); 44 | 45 | } // namespace riegeli 46 | 47 | #endif // _WIN32 48 | 49 | #endif // RIEGELI_BASE_UNICODE_H_ 50 | -------------------------------------------------------------------------------- /riegeli/records/records_metadata.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package riegeli; 4 | 5 | import "google/protobuf/descriptor.proto"; 6 | 7 | // Information about a Riegeli/records file, which may be helpful to interpret 8 | // file contents. 9 | message RecordsMetadata { 10 | // Human-readable explanation of what the file contains. 11 | optional string file_comment = 1; 12 | 13 | // If records are proto messages of a fixed type, the full name of their type. 14 | optional string record_type_name = 2; 15 | 16 | // If `record_type_name` is set, proto file descriptors which should contain 17 | // the definition of that type and their dependencies (each file comes after 18 | // all its dependencies). 19 | // 20 | // If `file_descriptor` is empty but `record_type_name` is set (not 21 | // recommended), `record_type_name` can be interpreted in the context of an 22 | // unspecified proto descriptor database. 23 | repeated google.protobuf.FileDescriptorProto file_descriptor = 3; 24 | 25 | // Options originally used to encode the file: 26 | // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md 27 | // 28 | // They are informative here, they are never necessary to decode the file. 29 | optional string record_writer_options = 4; 30 | 31 | // Number of records in the file, so that the reader can tune for it. 32 | // 33 | // This is informative, the actual number of records may differ. 34 | optional int64 num_records = 5; 35 | 36 | // Clients can define custom metadata in extensions of this message. 37 | extensions 1000 to max; 38 | } 39 | -------------------------------------------------------------------------------- /python/riegeli/records/records_metadata.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package riegeli; 4 | 5 | import "google/protobuf/descriptor.proto"; 6 | 7 | // Information about a Riegeli/records file, which may be helpful to interpret 8 | // file contents. 9 | message RecordsMetadata { 10 | // Human-readable explanation of what the file contains. 11 | optional string file_comment = 1; 12 | 13 | // If records are proto messages of a fixed type, the full name of their type. 14 | optional string record_type_name = 2; 15 | 16 | // If `record_type_name` is set, proto file descriptors which should contain 17 | // the definition of that type and their dependencies (each file comes after 18 | // all its dependencies). 19 | // 20 | // If `file_descriptor` is empty but `record_type_name` is set (not 21 | // recommended), `record_type_name` can be interpreted in the context of an 22 | // unspecified proto descriptor database. 23 | repeated google.protobuf.FileDescriptorProto file_descriptor = 3; 24 | 25 | // Options originally used to encode the file: 26 | // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md 27 | // 28 | // They are informative here, they are never necessary to decode the file. 29 | optional string record_writer_options = 4; 30 | 31 | // Number of records in the file, so that the reader can tune for it. 32 | // 33 | // This is informative, the actual number of records may differ. 34 | optional int64 num_records = 5; 35 | 36 | // Clients can define custom metadata in extensions of this message. 37 | extensions 1000 to max; 38 | } 39 | -------------------------------------------------------------------------------- /riegeli/bytes/fd_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BYTES_FD_INTERNAL_H_ 16 | #define RIEGELI_BYTES_FD_INTERNAL_H_ 17 | 18 | #ifndef __APPLE__ 19 | #include 20 | #endif 21 | 22 | #include 23 | 24 | namespace riegeli::fd_internal { 25 | 26 | // Infers a filename from fd by reading the symlink target for 27 | // `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns a 28 | // `absl::StrCat("")` placeholder instead). 29 | std::string FilenameForFd(int fd); 30 | 31 | #ifndef _WIN32 32 | #ifndef __APPLE__ 33 | inline constexpr int kCloseOnExec = O_CLOEXEC; 34 | #else // __APPLE__ 35 | // On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is 36 | // defined out of line. 37 | extern const int kCloseOnExec; 38 | #endif // __APPLE__ 39 | #else // _WIN32 40 | inline constexpr int kCloseOnExec = _O_NOINHERIT; 41 | #endif // _WIN32 42 | 43 | } // namespace riegeli::fd_internal 44 | 45 | #endif // RIEGELI_BYTES_FD_INTERNAL_H_ 46 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/decompressor.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/chunk_encoding/decompressor.h" 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include "absl/base/optimization.h" 23 | #include "riegeli/base/chain.h" 24 | #include "riegeli/bytes/chain_reader.h" 25 | #include "riegeli/chunk_encoding/constants.h" 26 | #include "riegeli/varint/varint_reading.h" 27 | 28 | namespace riegeli::chunk_encoding_internal { 29 | 30 | std::optional UncompressedSize(const Chain& compressed_data, 31 | CompressionType compression_type) { 32 | if (compression_type == CompressionType::kNone) return compressed_data.size(); 33 | ChainReader<> compressed_data_reader(&compressed_data); 34 | uint64_t size; 35 | if (ABSL_PREDICT_FALSE(!ReadVarint64(compressed_data_reader, size))) { 36 | return std::nullopt; 37 | } 38 | return size; 39 | } 40 | 41 | } // namespace riegeli::chunk_encoding_internal 42 | -------------------------------------------------------------------------------- /riegeli/digests/highwayhash_digester.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/digests/highwayhash_digester.h" 16 | 17 | #include "highwayhash/hh_types.h" 18 | 19 | namespace riegeli { 20 | 21 | template <> 22 | alignas(32) const HighwayHashKey 23 | HighwayHashDigester::kDefaultKey = { 24 | 0x4ea9929a25d561c6, 25 | 0x98470d187b523e8f, 26 | 0x592040a2da3c4b53, 27 | 0xbff8b246e3c587a2, 28 | }; 29 | 30 | template <> 31 | alignas(32) const HighwayHashKey 32 | HighwayHashDigester::kDefaultKey = { 33 | 0x025ed8a16fb5f783, 34 | 0xb44bc74d89d26c86, 35 | 0x111ea964039fa769, 36 | 0x6f7d7159e15612b6, 37 | }; 38 | 39 | template <> 40 | alignas(32) const HighwayHashKey 41 | HighwayHashDigester::kDefaultKey = { 42 | 0x93fee04321119357, 43 | 0x21e397ea62c264b6, 44 | 0x9d856914f2ad0e15, 45 | 0x64dca6f86247f384, 46 | }; 47 | 48 | } // namespace riegeli 49 | -------------------------------------------------------------------------------- /python/riegeli/tensorflow/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_binary") 2 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 3 | 4 | package( 5 | default_visibility = ["//visibility:public"], 6 | features = ["header_modules"], 7 | ) 8 | 9 | licenses(["notice"]) 10 | 11 | py_library( 12 | name = "riegeli_dataset_ops", 13 | srcs = ["ops/riegeli_dataset_ops.py"], 14 | data = [":ops/_riegeli_dataset_ops.so"], 15 | ) 16 | 17 | cc_binary( 18 | name = "ops/_riegeli_dataset_ops.so", 19 | srcs = [ 20 | "//riegeli/tensorflow:kernels/riegeli_dataset_ops.cc", 21 | "//riegeli/tensorflow:ops/riegeli_dataset_ops.cc", 22 | ], 23 | # tensorflow/core/lib/core/refcount.h needs NDEBUG consistency between 24 | # translation units. 25 | copts = ["-DNDEBUG"], 26 | linkshared = True, 27 | deps = [ 28 | "//riegeli/base:arithmetic", 29 | "//riegeli/records:record_position", 30 | "//riegeli/records:record_reader", 31 | "//riegeli/records:skipped_region", 32 | "//riegeli/tensorflow/io:file_reader", 33 | "@com_google_absl//absl/base:core_headers", 34 | "@com_google_absl//absl/status", 35 | "@com_google_absl//absl/strings:string_view", 36 | "@com_google_absl//absl/synchronization", 37 | "@local_config_tf//:libtensorflow_framework", 38 | "@local_config_tf//:tf_header_lib", 39 | ], 40 | ) 41 | 42 | py_test( 43 | name = "riegeli_dataset_test", 44 | srcs = ["kernel_tests/riegeli_dataset_test.py"], 45 | deps = [ 46 | ":riegeli_dataset_ops", 47 | "//python/riegeli", 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /riegeli/lines/newline.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_LINES_NEWLINE_H_ 16 | #define RIEGELI_LINES_NEWLINE_H_ 17 | 18 | #include "absl/strings/string_view.h" 19 | 20 | namespace riegeli { 21 | 22 | // Line terminator representations to recognize. 23 | enum class ReadNewline { 24 | kLf, // LF ("\n") 25 | kCrLfOrLf, // CR-LF | LF ("\r\n" | "\n") 26 | }; 27 | 28 | // Line terminator representation to write. 29 | enum class WriteNewline { 30 | kLf, // LF ("\n") 31 | kCrLf, // CR-LF ("\r\n") 32 | 33 | #ifndef _WIN32 34 | kNative = kLf, 35 | #else 36 | kNative = kCrLf, 37 | #endif 38 | }; 39 | 40 | // Native line representation as a string. 41 | #ifndef _WIN32 42 | inline constexpr absl::string_view kNewline = "\n"; 43 | #else 44 | inline constexpr absl::string_view kNewline = "\r\n"; 45 | #endif 46 | 47 | // UTF-8 BOM representation as a string. 48 | inline constexpr absl::string_view kUtf8Bom = "\xef\xbb\xbf"; 49 | 50 | } // namespace riegeli 51 | 52 | #endif // RIEGELI_LINES_NEWLINE_H_ 53 | -------------------------------------------------------------------------------- /riegeli/records/tools/riegeli_summary.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package riegeli.summary; 4 | 5 | import "riegeli/records/records_metadata.proto"; 6 | 7 | // Summary of a Riegeli/records file contents. 8 | // 9 | // This is currently used merely to format the output of describe_riegeli_file 10 | // as structured data. 11 | 12 | enum ChunkType { 13 | FILE_SIGNATURE = 0x73; 14 | FILE_METADATA = 0x6d; 15 | PADDING = 0x70; 16 | SIMPLE = 0x72; 17 | TRANSPOSED = 0x74; 18 | } 19 | 20 | enum CompressionType { 21 | NONE = 0; 22 | BROTLI = 0x62; 23 | ZSTD = 0x7a; 24 | SNAPPY = 0x73; 25 | } 26 | 27 | message SimpleChunk { 28 | optional CompressionType compression_type = 1; 29 | repeated uint64 record_sizes = 2 [packed = true]; 30 | repeated bytes records = 3; 31 | } 32 | 33 | message TransposedChunk { 34 | optional CompressionType compression_type = 1; 35 | repeated uint64 record_sizes = 2 [packed = true]; 36 | repeated bytes records = 3; 37 | } 38 | 39 | message Chunk { 40 | optional uint64 chunk_begin = 1; 41 | optional ChunkType chunk_type = 2; 42 | optional uint64 data_size = 3; 43 | optional uint64 num_records = 4; 44 | optional uint64 decoded_data_size = 5; 45 | oneof Data { 46 | riegeli.RecordsMetadata file_metadata_chunk = 6; 47 | SimpleChunk simple_chunk = 7; 48 | TransposedChunk transposed_chunk = 8; 49 | } 50 | } 51 | 52 | // This is not used because each chunk is printed on the fly, so that the output 53 | // appears incrementally. 54 | // 55 | // message File { 56 | // optional string filename = 1; 57 | // optional uint64 file_size = 2; 58 | // repeated Chunk chunk = 3; 59 | // } 60 | -------------------------------------------------------------------------------- /python/riegeli/records/skipped_region.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Details about a skipped region of invalid file contents.""" 15 | 16 | __all__ = ('SkippedRegion',) 17 | 18 | 19 | class SkippedRegion: 20 | """Details about a skipped region of invalid file contents. 21 | 22 | Attributes: 23 | begin: File position of the beginning of the skipped region, inclusive. 24 | end: File position of the end of the skipped region, exclusive. 25 | length: Length of the skipped region, in bytes. 26 | message: Message explaining why the region is invalid. 27 | """ 28 | 29 | __slots__ = ('begin', 'end', 'message') 30 | 31 | def __init__(self, begin, end, message): 32 | if begin > end: 33 | raise ValueError(f'Positions in the wrong order: {begin} > {end}') 34 | self.begin = begin 35 | self.end = end 36 | self.message = message 37 | 38 | @property 39 | def length(self): 40 | return self.end - self.begin 41 | 42 | def __str__(self): 43 | return f'[{self.begin}..{self.end}): {self.message}' 44 | 45 | def __repr__(self): 46 | return f'SkippedRegion({self.begin}, {self.end}, {self.message!r})' 47 | -------------------------------------------------------------------------------- /python/riegeli/py_extension.bzl: -------------------------------------------------------------------------------- 1 | """Supports writing Python modules in C++.""" 2 | 3 | load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") 4 | load("@rules_python//python:defs.bzl", "py_library") 5 | 6 | def py_extension( 7 | name = None, 8 | srcs = None, 9 | hdrs = None, 10 | data = None, 11 | features = None, 12 | visibility = None, 13 | deps = None): 14 | """Creates a Python module implemented in C++. 15 | 16 | Python modules can depend on a py_extension. Other py_extensions can depend 17 | on a generated C++ library named with "_cc" suffix. 18 | 19 | Args: 20 | name: Name for this target. 21 | srcs: C++ source files. 22 | hdrs: C++ header files, for other py_extensions which depend on this. 23 | data: Files needed at runtime. This may include Python libraries. 24 | features: Passed to cc_library. 25 | visibility: Controls which rules can depend on this. 26 | deps: Other C++ libraries that this library depends upon. 27 | """ 28 | 29 | cc_library_name = name + "_cc" 30 | cc_binary_name = name + ".so" 31 | cc_library( 32 | name = cc_library_name, 33 | srcs = srcs, 34 | hdrs = hdrs, 35 | data = data, 36 | features = features, 37 | visibility = visibility, 38 | deps = deps, 39 | alwayslink = True, 40 | ) 41 | cc_binary( 42 | name = cc_binary_name, 43 | linkshared = True, 44 | linkstatic = True, 45 | visibility = ["//visibility:private"], 46 | deps = [cc_library_name], 47 | ) 48 | 49 | py_library( 50 | name = name, 51 | data = [cc_binary_name], 52 | visibility = visibility, 53 | ) 54 | -------------------------------------------------------------------------------- /riegeli/lz4/lz4_dictionary.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Enables the experimental lz4 API: 16 | // * `LZ4F_createCDict()` 17 | // * `LZ4F_freeCDict()` 18 | #define LZ4F_STATIC_LINKING_ONLY 19 | 20 | #include "riegeli/lz4/lz4_dictionary.h" 21 | 22 | #include 23 | 24 | #include "absl/base/attributes.h" 25 | #include "absl/base/call_once.h" 26 | #include "absl/strings/string_view.h" 27 | #include "lz4frame.h" 28 | #include "riegeli/base/shared_ptr.h" 29 | 30 | namespace riegeli { 31 | 32 | void Lz4Dictionary::Repr::LZ4F_CDictDeleter::operator()(LZ4F_CDict* ptr) const { 33 | LZ4F_freeCDict(ptr); 34 | } 35 | 36 | inline const LZ4F_CDict* Lz4Dictionary::Repr::PrepareCompressionDictionary() 37 | const { 38 | absl::call_once(compression_once_, [&] { 39 | compression_dictionary_.reset(LZ4F_createCDict(data_.data(), data_.size())); 40 | }); 41 | return compression_dictionary_.get(); 42 | } 43 | 44 | const LZ4F_CDict* Lz4Dictionary::PrepareCompressionDictionary() const 45 | ABSL_ATTRIBUTE_LIFETIME_BOUND { 46 | if (repr_ == nullptr) return nullptr; 47 | return repr_->PrepareCompressionDictionary(); 48 | } 49 | 50 | } // namespace riegeli 51 | -------------------------------------------------------------------------------- /riegeli/digests/crc32_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_CRC32_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_CRC32_DIGESTER_H_ 17 | 18 | #include 19 | 20 | #include "absl/strings/string_view.h" 21 | 22 | namespace riegeli { 23 | 24 | // A digester computing CRC32 checksums, for `DigestingReader` and 25 | // `DigestingWriter`. 26 | // 27 | // This uses the polynomial x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + 28 | // x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 (0x104c11db7). 29 | // 30 | // This polynomial is used e.g. by gzip, zip, and png: 31 | // https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations_of_cyclic_redundancy_checks 32 | class Crc32Digester { 33 | public: 34 | Crc32Digester() : Crc32Digester(0) {} 35 | 36 | explicit Crc32Digester(uint32_t seed); 37 | 38 | Crc32Digester(const Crc32Digester& that) = default; 39 | Crc32Digester& operator=(const Crc32Digester& that) = default; 40 | 41 | void Write(absl::string_view src); 42 | uint32_t Digest() { return crc_; } 43 | 44 | private: 45 | uint32_t crc_; 46 | }; 47 | 48 | } // namespace riegeli 49 | 50 | #endif // RIEGELI_DIGESTS_CRC32_DIGESTER_H_ 51 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | PYTHON_BIN_PATH=`which python` 19 | 20 | if [[ $PYTHON_BIN_PATH ]] && $PYTHON_BIN_PATH -c "import tensorflow" &>/dev/null; then 21 | TF_CFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') 22 | TF_LFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') 23 | 24 | TF_HEADER_DIR=${TF_CFLAGS%% *} 25 | TF_HEADER_DIR=${TF_HEADER_DIR#-I} 26 | TF_SHARED_LIBRARY_DIR=${TF_LFLAGS%% *} 27 | TF_SHARED_LIBRARY_DIR=${TF_SHARED_LIBRARY_DIR#-L} 28 | TF_SHARED_LIBRARY_NAME=${TF_LFLAGS##* -l:} 29 | else 30 | TF_HEADER_DIR= 31 | TF_SHARED_LIBRARY_DIR= 32 | TF_SHARED_LIBRARY_NAME= 33 | fi 34 | 35 | { 36 | printf 'build --action_env PYTHON_BIN_PATH="%s"\n' "$PYTHON_BIN_PATH" 37 | printf 'build --action_env TF_HEADER_DIR="%s"\n' "$TF_HEADER_DIR" 38 | printf 'build --action_env TF_SHARED_LIBRARY_DIR="%s"\n' "$TF_SHARED_LIBRARY_DIR" 39 | printf 'build --action_env TF_SHARED_LIBRARY_NAME="%s"\n' "$TF_SHARED_LIBRARY_NAME" 40 | } >configure.bazelrc 41 | 42 | echo "Set up configure.bazelrc. Make sure to include it in your .bazelrc file." 43 | -------------------------------------------------------------------------------- /riegeli/base/stream_utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/stream_utils.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "absl/base/nullability.h" 25 | 26 | ABSL_POINTERS_DEFAULT_NONNULL 27 | 28 | namespace riegeli { 29 | 30 | void WritePadding(std::ostream& dest, size_t length, char fill) { 31 | char buffer[64]; 32 | std::memset(buffer, fill, sizeof(buffer)); 33 | while (length > sizeof(buffer)) { 34 | dest.write(buffer, std::streamsize{sizeof(buffer)}); 35 | length -= sizeof(buffer); 36 | } 37 | dest.write(buffer, static_cast(length)); 38 | } 39 | 40 | int StringifyOStream::StringStreambuf::overflow(int src) { 41 | if (src != traits_type::eof()) dest_->push_back(static_cast(src)); 42 | return traits_type::not_eof(src); 43 | } 44 | 45 | std::streamsize StringifyOStream::StringStreambuf::xsputn( 46 | const char* absl_nullable src, std::streamsize length) { 47 | assert(length >= 0); 48 | dest_->append(src, static_cast(length)); 49 | return length; 50 | } 51 | 52 | } // namespace riegeli 53 | -------------------------------------------------------------------------------- /riegeli/base/closing_ptr.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_CLOSING_PTR_H_ 16 | #define RIEGELI_BASE_CLOSING_PTR_H_ 17 | 18 | #include 19 | 20 | #include "absl/base/attributes.h" 21 | #include "absl/base/nullability.h" 22 | 23 | ABSL_POINTERS_DEFAULT_NONNULL 24 | 25 | namespace riegeli { 26 | 27 | // A deleter for `std::unique_ptr` which does nothing. 28 | struct NullDeleter { 29 | template 30 | void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {} 31 | }; 32 | 33 | // Marks the pointer with the intent to transfer the responsibility to close the 34 | // object when done with the pointer, even though the object is not moved nor 35 | // destroyed. 36 | // 37 | // In the context of `Dependency` and `Any`, passing `ClosingPtr(&m)` 38 | // instead of `std::move(m)` avoids moving `m`, but the caller must ensure that 39 | // the dependent object is valid while the host object needs it. 40 | 41 | template 42 | using ClosingPtrType = std::unique_ptr; 43 | 44 | template 45 | inline ClosingPtrType ClosingPtr(T* ptr) { 46 | return ClosingPtrType(ptr); 47 | } 48 | 49 | } // namespace riegeli 50 | 51 | #endif // RIEGELI_BASE_CLOSING_PTR_H_ 52 | -------------------------------------------------------------------------------- /riegeli/base/parallelism.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_PARALLELISM_H_ 16 | #define RIEGELI_BASE_PARALLELISM_H_ 17 | 18 | #include 19 | 20 | #include 21 | 22 | #include "absl/base/attributes.h" 23 | #include "absl/base/thread_annotations.h" 24 | #include "absl/functional/any_invocable.h" 25 | #include "absl/synchronization/mutex.h" 26 | 27 | namespace riegeli::internal { 28 | 29 | // A thread pool with lazily created worker threads, without a thread count 30 | // limit. Worker threads exit after being idle for one minute. 31 | class ThreadPool { 32 | public: 33 | ThreadPool() {} 34 | 35 | ThreadPool(const ThreadPool&) = delete; 36 | ThreadPool& operator=(const ThreadPool&) = delete; 37 | 38 | ~ThreadPool(); 39 | 40 | static ThreadPool& global(); 41 | 42 | void Schedule(absl::AnyInvocable task); 43 | 44 | private: 45 | absl::Mutex mutex_; 46 | bool exiting_ ABSL_GUARDED_BY(mutex_) = false; 47 | size_t num_threads_ ABSL_GUARDED_BY(mutex_) = 0; 48 | size_t num_idle_threads_ ABSL_GUARDED_BY(mutex_) = 0; 49 | std::deque> tasks_ ABSL_GUARDED_BY(mutex_); 50 | }; 51 | 52 | } // namespace riegeli::internal 53 | 54 | #endif // RIEGELI_BASE_PARALLELISM_H_ 55 | -------------------------------------------------------------------------------- /python/riegeli/records/record_position.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ 16 | #define PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ 17 | 18 | // From https://docs.python.org/3/c-api/intro.html: 19 | // Since Python may define some pre-processor definitions which affect the 20 | // standard headers on some systems, you must include Python.h before any 21 | // standard headers are included. 22 | #include 23 | // clang-format: do not reorder the above include. 24 | 25 | #include 26 | 27 | #include "python/riegeli/base/utils.h" 28 | #include "riegeli/records/record_position.h" 29 | 30 | namespace riegeli::python { 31 | 32 | // Access the API thus: 33 | // ``` 34 | // static constexpr ImportedCapsule kRecordPositionApi( 35 | // kRecordPositionCapsuleName); 36 | // ``` 37 | 38 | struct RecordPositionApi { 39 | PythonPtr (*RecordPositionToPython)(FutureRecordPosition value); 40 | std::optional (*RecordPositionFromPython)(PyObject* object); 41 | }; 42 | 43 | inline constexpr const char* kRecordPositionCapsuleName = 44 | "riegeli.records.record_position._CPPAPI"; 45 | 46 | } // namespace riegeli::python 47 | 48 | #endif // PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ 49 | -------------------------------------------------------------------------------- /riegeli/base/ownership.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_OWNERSHIP_H_ 16 | #define RIEGELI_BASE_OWNERSHIP_H_ 17 | 18 | #include 19 | 20 | namespace riegeli { 21 | 22 | // `PassOwnership` and `ShareOwnership` type tags specify how ownership of a 23 | // potentially shared object is transferred, for cases when this is not implied 24 | // by parameter types. 25 | // 26 | // * `PassOwnership`: the original owner drops its reference. The reference 27 | // count is decreased unless the new owner gets a reference instead. 28 | // 29 | // * `ShareOwnership`: The original owner keeps its reference. The reference 30 | // count is increased if the new owner also gets a reference. 31 | 32 | struct PassOwnership {}; 33 | inline constexpr PassOwnership kPassOwnership = {}; 34 | 35 | struct ShareOwnership {}; 36 | inline constexpr ShareOwnership kShareOwnership = {}; 37 | 38 | // `IsOwnership::value` is `true` if `T` is `PassOwnership` or 39 | // `ShareOwnership`. 40 | 41 | template 42 | struct IsOwnership : std::false_type {}; 43 | 44 | template <> 45 | struct IsOwnership : std::true_type {}; 46 | 47 | template <> 48 | struct IsOwnership : std::true_type {}; 49 | 50 | } // namespace riegeli 51 | 52 | #endif // RIEGELI_BASE_OWNERSHIP_H_ 53 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/brotli_encoder_selection.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ 16 | #define RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ 17 | 18 | #include 19 | 20 | #include "riegeli/base/chain.h" 21 | #include "riegeli/base/recycling_pool.h" 22 | #include "riegeli/bytes/writer.h" 23 | #include "riegeli/chunk_encoding/compressor_options.h" 24 | 25 | namespace riegeli::chunk_encoding_internal { 26 | 27 | // Creates a `Writer` which compresses data with Brotli and writes them to 28 | // `compressed`. 29 | // 30 | // The encoder implementation is determined by 31 | // `compressor_options.brotli_encoder()`. 32 | // 33 | // This is a weak function. Its default definition supports only C Brotli. 34 | // It can be overridden to support also Rust Brotli. 35 | std::unique_ptr NewBrotliWriter( 36 | Chain* compressed, const CompressorOptions& compressor_options, 37 | const RecyclingPoolOptions& recycling_pool_options); 38 | 39 | // Support for `NewBrotliWriter()`: uses C Brotli, ignores 40 | // `compressor_options.brotli_encoder()`. 41 | std::unique_ptr NewCBrotliWriter( 42 | Chain* compressed, const CompressorOptions& compressor_options); 43 | 44 | } // namespace riegeli::chunk_encoding_internal 45 | 46 | #endif // RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ 47 | -------------------------------------------------------------------------------- /riegeli/snappy/hadoop/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "hadoop_snappy_reader", 12 | srcs = ["hadoop_snappy_reader.cc"], 13 | hdrs = ["hadoop_snappy_reader.h"], 14 | deps = [ 15 | "//riegeli/base:arithmetic", 16 | "//riegeli/base:assert", 17 | "//riegeli/base:buffer", 18 | "//riegeli/base:dependency", 19 | "//riegeli/base:initializer", 20 | "//riegeli/base:object", 21 | "//riegeli/base:status", 22 | "//riegeli/base:types", 23 | "//riegeli/bytes:pullable_reader", 24 | "//riegeli/bytes:reader", 25 | "//riegeli/endian:endian_reading", 26 | "@com_google_absl//absl/base:core_headers", 27 | "@com_google_absl//absl/status", 28 | "@com_google_absl//absl/strings", 29 | "@com_google_absl//absl/strings:string_view", 30 | "@snappy", 31 | ], 32 | ) 33 | 34 | cc_library( 35 | name = "hadoop_snappy_writer", 36 | srcs = ["hadoop_snappy_writer.cc"], 37 | hdrs = ["hadoop_snappy_writer.h"], 38 | deps = [ 39 | ":hadoop_snappy_reader", 40 | "//riegeli/base:arithmetic", 41 | "//riegeli/base:assert", 42 | "//riegeli/base:buffer", 43 | "//riegeli/base:buffering", 44 | "//riegeli/base:dependency", 45 | "//riegeli/base:initializer", 46 | "//riegeli/base:object", 47 | "//riegeli/base:status", 48 | "//riegeli/base:types", 49 | "//riegeli/bytes:pushable_writer", 50 | "//riegeli/bytes:reader", 51 | "//riegeli/bytes:writer", 52 | "//riegeli/endian:endian_writing", 53 | "@com_google_absl//absl/base:core_headers", 54 | "@com_google_absl//absl/status", 55 | "@com_google_absl//absl/strings", 56 | "@snappy", 57 | ], 58 | ) 59 | -------------------------------------------------------------------------------- /riegeli/bytes/iostream_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BYTES_IOSTREAM_INTERNAL_H_ 16 | #define RIEGELI_BYTES_IOSTREAM_INTERNAL_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | namespace riegeli::iostream_internal { 23 | 24 | // There is no `std::istream::close()` nor `std::ostream::close()`, but some 25 | // subclasses have `close()`, e.g. `std::ifstream`, `std::ofstream`, 26 | // `std::fstream`. It is important to call `close()` before their destructor 27 | // to detect errors. 28 | // 29 | // `iostream_internal::Close(stream)` calls `stream->close()` if that is 30 | // defined, otherwise does nothing. 31 | 32 | template 33 | struct HasClose : std::false_type {}; 34 | 35 | template 36 | struct HasClose().close())>> 37 | : std::true_type {}; 38 | 39 | template 40 | inline void Close(Stream& stream) { 41 | if constexpr (HasClose::value) { 42 | stream.close(); 43 | } 44 | } 45 | 46 | template 47 | inline std::istream* DetectIStream(T* stream) { 48 | if constexpr (std::is_base_of_v) { 49 | return stream; 50 | } else { 51 | return nullptr; 52 | } 53 | } 54 | 55 | } // namespace riegeli::iostream_internal 56 | 57 | #endif // RIEGELI_BYTES_IOSTREAM_INTERNAL_H_ 58 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/chunk_encoder.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/chunk_encoding/chunk_encoder.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/status/status.h" 21 | #include "absl/strings/cord.h" 22 | #include "google/protobuf/message_lite.h" 23 | #include "riegeli/base/chain.h" 24 | #include "riegeli/messages/serialize_message.h" 25 | 26 | namespace riegeli { 27 | 28 | void ChunkEncoder::Done() { 29 | num_records_ = 0; 30 | decoded_data_size_ = 0; 31 | } 32 | 33 | bool ChunkEncoder::AddRecord(const google::protobuf::MessageLite& record, 34 | SerializeMessageOptions serialize_options) { 35 | if (ABSL_PREDICT_FALSE(!ok())) return false; 36 | Chain serialized; 37 | if (absl::Status status = 38 | SerializeMessage(record, serialized, serialize_options); 39 | ABSL_PREDICT_FALSE(!status.ok())) { 40 | return Fail(std::move(status)); 41 | } 42 | return AddRecord(std::move(serialized)); 43 | } 44 | 45 | bool ChunkEncoder::AddRecord(Chain&& record) { 46 | // Not `std::move(record)`: forward to `AddRecord(const Chain&)`. 47 | return AddRecord(record); 48 | } 49 | 50 | bool ChunkEncoder::AddRecord(absl::Cord&& record) { 51 | // Not `std::move(record)`: forward to `AddRecord(const absl::Cord&)`. 52 | return AddRecord(record); 53 | } 54 | 55 | } // namespace riegeli 56 | -------------------------------------------------------------------------------- /riegeli/records/tools/tfrecord_recognizer.h: -------------------------------------------------------------------------------- 1 | #include "absl/base/attributes.h" 2 | // Copyright 2018 Google LLC 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_ 17 | #define RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_ 18 | 19 | #include "riegeli/base/assert.h" 20 | #include "riegeli/base/object.h" 21 | #include "riegeli/bytes/reader.h" 22 | #include "tensorflow/core/lib/io/record_reader.h" 23 | 24 | namespace riegeli { 25 | 26 | class TFRecordRecognizer : public Object { 27 | public: 28 | explicit TFRecordRecognizer( 29 | Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND); 30 | 31 | // Ensures that the file looks like a valid TFRecord file. 32 | // 33 | // Updates `record_reader_options` on success. 34 | // 35 | // Return values: 36 | // * `true` - success (`record_reader_options` is updated) 37 | // * `false` (when `ok()`) - source ends 38 | // * `false` (when `!ok()`) - failure 39 | bool CheckFileFormat( 40 | tensorflow::io::RecordReaderOptions& record_reader_options); 41 | 42 | private: 43 | Reader* byte_reader_; 44 | }; 45 | 46 | // Implementation details follow. 47 | 48 | inline TFRecordRecognizer::TFRecordRecognizer( 49 | Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND) 50 | : byte_reader_(RIEGELI_EVAL_ASSERT_NOTNULL(byte_reader)) {} 51 | 52 | } // namespace riegeli 53 | 54 | #endif // RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_ 55 | -------------------------------------------------------------------------------- /riegeli/brotli/brotli_dictionary.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/brotli/brotli_dictionary.h" 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include "absl/base/attributes.h" 23 | #include "absl/base/call_once.h" 24 | #include "absl/strings/string_view.h" 25 | #include "brotli/encode.h" 26 | #include "brotli/shared_dictionary.h" 27 | #include "riegeli/base/assert.h" 28 | 29 | namespace riegeli { 30 | 31 | const BrotliEncoderPreparedDictionary* 32 | BrotliDictionary::Chunk::PrepareCompressionDictionary() const 33 | ABSL_ATTRIBUTE_LIFETIME_BOUND { 34 | absl::call_once(compression_once_, [&] { 35 | if (type_ == Type::kNative) { 36 | RIEGELI_ASSERT_NE(compression_dictionary_, nullptr) 37 | << "Failed invariant of BrotliDictionary::Chunk: " 38 | "unprepared native chunk"; 39 | return; 40 | } 41 | owned_compression_dictionary_.reset(BrotliEncoderPrepareDictionary( 42 | static_cast(type_), data_.size(), 43 | reinterpret_cast(data_.data()), BROTLI_MAX_QUALITY, 44 | // `BrotliAllocator` is not supported here because the prepared 45 | // dictionary may easily outlive the allocator. 46 | nullptr, nullptr, nullptr)); 47 | compression_dictionary_ = owned_compression_dictionary_.get(); 48 | }); 49 | return compression_dictionary_; 50 | } 51 | 52 | } // namespace riegeli 53 | -------------------------------------------------------------------------------- /riegeli/base/assert.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/assert.h" 16 | 17 | #include 18 | 19 | #include "absl/base/nullability.h" 20 | #include "absl/log/absl_log.h" 21 | #include "riegeli/base/stream_utils.h" 22 | 23 | ABSL_POINTERS_DEFAULT_NONNULL 24 | 25 | namespace riegeli::assert_internal { 26 | 27 | CheckResult::CheckResult(const char* function, const char* prefix) 28 | : header_(new StringOStream(new std::string())) { 29 | header() << "Check failed in " << function << ": " << prefix; 30 | } 31 | 32 | CheckFailed::CheckFailed(const char* file, int line, CheckResult check_result) 33 | : file_(file), 34 | line_(line), 35 | check_result_(check_result), 36 | details_(new StringOStream(new std::string())) {} 37 | 38 | CheckFailed::~CheckFailed() { 39 | if (!details_->dest()->empty()) { 40 | check_result_.header() << "; " << *details_->dest(); 41 | } 42 | ABSL_LOG(FATAL).AtLocation(file_, line_) << *check_result_.header().dest(); 43 | } 44 | 45 | void CheckNotNullFailed(const char* file, int line, const char* function, 46 | const char* expression) { 47 | CheckResult check_result(function, expression); 48 | check_result.header() << " != nullptr"; 49 | CheckFailed check_failed(file, line, check_result); 50 | } 51 | 52 | CheckResult CheckImpossibleResult(const char* function) { 53 | return CheckResult(function, "Impossible"); 54 | } 55 | 56 | } // namespace riegeli::assert_internal 57 | -------------------------------------------------------------------------------- /riegeli/base/estimated_allocated_size.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ 16 | #define RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ 17 | 18 | #include 19 | 20 | #include "absl/base/attributes.h" 21 | #include "absl/base/nullability.h" 22 | #include "riegeli/base/arithmetic.h" 23 | 24 | ABSL_POINTERS_DEFAULT_NONNULL 25 | 26 | namespace riegeli { 27 | 28 | // Returns the estimated size which will be allocated when requesting to 29 | // allocate `requested_size`. 30 | inline size_t EstimatedAllocatedSize(size_t requested_size) { 31 | // Placeholder for asking the memory manager, which might be possible on some 32 | // platforms. 33 | return RoundUp<2 * sizeof(void*)>( 34 | UnsignedMax(requested_size, 4 * sizeof(void*))); 35 | } 36 | 37 | // Returns the estimated size which was allocated at `ptr` when requested to 38 | // allocate `requested_size`. 39 | inline size_t EstimatedAllocatedSize(ABSL_ATTRIBUTE_UNUSED const void* ptr, 40 | size_t requested_size) { 41 | // Placeholder for using `ptr`, which might be possible on some platforms. 42 | return EstimatedAllocatedSize(requested_size); 43 | } 44 | 45 | // A deterministic variant of `EstimatedAllocatedSize()`, useful for testing. 46 | inline size_t EstimatedAllocatedSizeForTesting(size_t requested_size) { 47 | return 16 + requested_size; 48 | } 49 | 50 | } // namespace riegeli 51 | 52 | #endif // RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ 53 | -------------------------------------------------------------------------------- /riegeli/bytes/fd_internal.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef _WIN32 16 | 17 | // Make `readlink()` available, and make `O_CLOEXEC` available on Darwin. 18 | #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700 19 | #undef _XOPEN_SOURCE 20 | #define _XOPEN_SOURCE 700 21 | #endif 22 | 23 | #endif 24 | 25 | #include "riegeli/bytes/fd_internal.h" 26 | 27 | #ifdef __APPLE__ 28 | #include 29 | #endif 30 | #ifndef _WIN32 31 | #include 32 | #include 33 | #endif 34 | 35 | #include 36 | 37 | #ifndef _WIN32 38 | #include "absl/base/optimization.h" 39 | #endif 40 | #include "absl/strings/str_cat.h" 41 | #ifndef _WIN32 42 | #include "riegeli/base/arithmetic.h" 43 | #include "riegeli/base/buffer.h" 44 | #endif 45 | 46 | namespace riegeli::fd_internal { 47 | 48 | std::string FilenameForFd(int fd) { 49 | #ifndef _WIN32 50 | std::string filename = absl::StrCat("/proc/self/fd/", fd); 51 | Buffer buffer(PATH_MAX); 52 | const ssize_t length = readlink(filename.c_str(), buffer.data(), PATH_MAX); 53 | if (ABSL_PREDICT_TRUE(length >= 0)) { 54 | filename.assign(buffer.data(), IntCast(length)); 55 | } 56 | return filename; 57 | #else // _WIN32 58 | return absl::StrCat(""); 59 | #endif // _WIN32 60 | } 61 | 62 | #ifdef __APPLE__ 63 | // On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is 64 | // defined out of line. 65 | extern const int kCloseOnExec = O_CLOEXEC; 66 | #endif // __APPLE__ 67 | 68 | } // namespace riegeli::fd_internal 69 | -------------------------------------------------------------------------------- /riegeli/gcs/gcs_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_GCS_GCS_INTERNAL_H_ 16 | #define RIEGELI_GCS_GCS_INTERNAL_H_ 17 | 18 | #include "absl/base/attributes.h" 19 | #include "absl/status/status.h" 20 | #include "google/cloud/status.h" 21 | 22 | namespace riegeli::gcs_internal { 23 | 24 | inline absl::Status FromCloudStatus(const google::cloud::Status& status) { 25 | return absl::Status(static_cast(status.code()), 26 | status.message()); 27 | } 28 | 29 | template 30 | T GetOption() { 31 | return T(); 32 | } 33 | template 34 | const T& GetOption(const T& option) { 35 | return option; 36 | } 37 | template 38 | const T& GetOption(ABSL_ATTRIBUTE_UNUSED const T& previous_option, 39 | const T& option, const Options&... options) { 40 | return GetOption(option, options...); 41 | } 42 | template 43 | const T& GetOption(const T& option, 44 | ABSL_ATTRIBUTE_UNUSED const Other& other_option, 45 | const Options&... options) { 46 | return GetOption(option, options...); 47 | } 48 | template 49 | auto GetOption(ABSL_ATTRIBUTE_UNUSED const Other& other_option, 50 | const Options&... options) { 51 | return GetOption(options...); 52 | } 53 | 54 | } // namespace riegeli::gcs_internal 55 | 56 | #endif // RIEGELI_GCS_GCS_INTERNAL_H_ 57 | -------------------------------------------------------------------------------- /riegeli/messages/message_wire_format.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_ 16 | #define RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_ 17 | 18 | #include 19 | 20 | namespace riegeli { 21 | 22 | // Low level functions for writing and reading serialized proto messages 23 | // directly. 24 | // 25 | // They mostly correspond to selected members of 26 | // `google::protobuf::internal::WireFormatLite`. 27 | 28 | // The part of a field tag which denotes the representation of the field value 29 | // which follows the tag. 30 | enum class WireType : uint32_t { 31 | kVarint = 0, 32 | kFixed32 = 5, 33 | kFixed64 = 1, 34 | kLengthDelimited = 2, 35 | kStartGroup = 3, 36 | kEndGroup = 4, 37 | kInvalid6 = 6, 38 | kInvalid7 = 7, 39 | }; 40 | 41 | // Composes/decomposes a field tag. 42 | constexpr uint32_t MakeTag(int field_number, WireType wire_type); 43 | constexpr WireType GetTagWireType(uint32_t tag); 44 | constexpr int GetTagFieldNumber(uint32_t tag); 45 | 46 | // Implementation details follow. 47 | 48 | constexpr uint32_t MakeTag(int field_number, WireType wire_type) { 49 | return (static_cast(field_number) << 3) | 50 | static_cast(wire_type); 51 | } 52 | 53 | constexpr WireType GetTagWireType(uint32_t tag) { 54 | return static_cast(tag & 7); 55 | } 56 | 57 | constexpr int GetTagFieldNumber(uint32_t tag) { 58 | return static_cast(tag >> 3); 59 | } 60 | 61 | } // namespace riegeli 62 | 63 | #endif // RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_ 64 | -------------------------------------------------------------------------------- /riegeli/base/external_data.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_EXTERNAL_DATA_H_ 16 | #define RIEGELI_BASE_EXTERNAL_DATA_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include "absl/strings/string_view.h" 22 | 23 | namespace riegeli { 24 | 25 | // Type-erased external object with its deleter. 26 | // 27 | // `ExternalStorage` can be decomposed with `void* ExternalStorage::release()` 28 | // and `ExternalStorage::get_deleter() -> void (*)(void*)`. 29 | using ExternalStorage = std::unique_ptr; 30 | 31 | // Supports `ExternalRef`. 32 | inline ExternalStorage RiegeliToExternalStorage(ExternalStorage* self) { 33 | return std::move(*self); 34 | } 35 | 36 | // Type-erased external object with its deleter and a substring of a byte array 37 | // it owns. 38 | struct ExternalData { 39 | /*implicit*/ operator absl::string_view() const { return substr; } 40 | 41 | // Indicates support for: 42 | // * `ExternalRef(ExternalData&&)` 43 | // * `ExternalRef(ExternalData&&, substr)` 44 | friend void RiegeliSupportsExternalRef(ExternalData*) {} 45 | 46 | // Supports `ExternalRef`. 47 | friend ExternalStorage RiegeliToExternalStorage(ExternalData* self) { 48 | return std::move(self->storage); 49 | } 50 | 51 | ExternalStorage storage; // Must outlive usages of `substr`. 52 | absl::string_view substr; 53 | }; 54 | 55 | // Creates `ExternalData` holding a copy of `data`. 56 | ExternalData ExternalDataCopy(absl::string_view data); 57 | 58 | } // namespace riegeli 59 | 60 | #endif // RIEGELI_BASE_EXTERNAL_DATA_H_ 61 | -------------------------------------------------------------------------------- /python/riegeli/bytes/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//python/riegeli:__subpackages__"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "python_reader", 12 | srcs = ["python_reader.cc"], 13 | hdrs = ["python_reader.h"], 14 | # python_reader.cc has #define before #include to influence what the 15 | # included files provide. 16 | features = ["-use_header_modules"], 17 | deps = [ 18 | "//python/riegeli/base:utils", 19 | "//riegeli/base:arithmetic", 20 | "//riegeli/base:assert", 21 | "//riegeli/base:global", 22 | "//riegeli/base:object", 23 | "//riegeli/base:types", 24 | "//riegeli/bytes:buffer_options", 25 | "//riegeli/bytes:buffered_reader", 26 | "@com_google_absl//absl/base:core_headers", 27 | "@com_google_absl//absl/numeric:bits", 28 | "@com_google_absl//absl/status", 29 | "@com_google_absl//absl/strings", 30 | "@com_google_absl//absl/strings:string_view", 31 | "@com_google_absl//absl/types:span", 32 | "@rules_python//python/cc:current_py_cc_headers", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "python_writer", 38 | srcs = ["python_writer.cc"], 39 | hdrs = ["python_writer.h"], 40 | # python_writer.cc has #define before #include to influence what the 41 | # included files provide. 42 | features = ["-use_header_modules"], 43 | deps = [ 44 | "//python/riegeli/base:utils", 45 | "//riegeli/base:arithmetic", 46 | "//riegeli/base:assert", 47 | "//riegeli/base:global", 48 | "//riegeli/base:object", 49 | "//riegeli/base:types", 50 | "//riegeli/bytes:buffer_options", 51 | "//riegeli/bytes:buffered_writer", 52 | "@com_google_absl//absl/base:core_headers", 53 | "@com_google_absl//absl/numeric:bits", 54 | "@com_google_absl//absl/status", 55 | "@com_google_absl//absl/strings", 56 | "@com_google_absl//absl/strings:string_view", 57 | "@rules_python//python/cc:current_py_cc_headers", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /python/riegeli/records/examples/write_read_records.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Simple example which writes and reads a Riegeli/records file.""" 15 | 16 | import io 17 | 18 | import riegeli 19 | from riegeli.records.tests import records_test_pb2 20 | 21 | 22 | def sample_string(i, size): 23 | piece = f'{i} '.encode() 24 | result = piece * -(-size // len(piece)) # len(result) >= size 25 | return result[:size] 26 | 27 | 28 | def sample_message(i, size): 29 | return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size)) 30 | 31 | 32 | def write_records(filename): 33 | print('Writing', filename) 34 | metadata = riegeli.RecordsMetadata() 35 | riegeli.set_record_type(metadata, records_test_pb2.SimpleMessage) 36 | with riegeli.RecordWriter( 37 | io.FileIO(filename, mode='wb'), options='transpose', metadata=metadata 38 | ) as writer: 39 | writer.write_messages(sample_message(i, 100) for i in range(100)) 40 | 41 | 42 | def read_records(filename): 43 | print('Reading', filename) 44 | with riegeli.RecordReader( 45 | io.FileIO(filename, mode='rb'), 46 | field_projection=[[ 47 | records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name['id'].number 48 | ]], 49 | ) as reader: 50 | print( 51 | ' '.join( 52 | str(record.id) 53 | for record in reader.read_messages(records_test_pb2.SimpleMessage) 54 | ) 55 | ) 56 | 57 | 58 | def main(): 59 | filename = '/tmp/riegeli_example' 60 | write_records(filename) 61 | read_records(filename) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /riegeli/snappy/framed/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "framed_snappy_reader", 12 | srcs = ["framed_snappy_reader.cc"], 13 | hdrs = ["framed_snappy_reader.h"], 14 | deps = [ 15 | "//riegeli/base:arithmetic", 16 | "//riegeli/base:assert", 17 | "//riegeli/base:buffer", 18 | "//riegeli/base:dependency", 19 | "//riegeli/base:initializer", 20 | "//riegeli/base:moving_dependency", 21 | "//riegeli/base:object", 22 | "//riegeli/base:status", 23 | "//riegeli/base:types", 24 | "//riegeli/bytes:pullable_reader", 25 | "//riegeli/bytes:reader", 26 | "//riegeli/endian:endian_reading", 27 | "@com_google_absl//absl/base:core_headers", 28 | "@com_google_absl//absl/crc:crc32c", 29 | "@com_google_absl//absl/status", 30 | "@com_google_absl//absl/strings", 31 | "@com_google_absl//absl/strings:string_view", 32 | "@snappy", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "framed_snappy_writer", 38 | srcs = ["framed_snappy_writer.cc"], 39 | hdrs = ["framed_snappy_writer.h"], 40 | deps = [ 41 | ":framed_snappy_reader", 42 | "//riegeli/base:arithmetic", 43 | "//riegeli/base:assert", 44 | "//riegeli/base:buffer", 45 | "//riegeli/base:buffering", 46 | "//riegeli/base:dependency", 47 | "//riegeli/base:initializer", 48 | "//riegeli/base:object", 49 | "//riegeli/base:status", 50 | "//riegeli/base:types", 51 | "//riegeli/bytes:pushable_writer", 52 | "//riegeli/bytes:reader", 53 | "//riegeli/bytes:writer", 54 | "//riegeli/digests:crc32c_digester", 55 | "//riegeli/digests:digesting_writer", 56 | "//riegeli/endian:endian_writing", 57 | "@com_google_absl//absl/base:core_headers", 58 | "@com_google_absl//absl/status", 59 | "@com_google_absl//absl/strings", 60 | "@com_google_absl//absl/strings:string_view", 61 | "@snappy", 62 | ], 63 | ) 64 | -------------------------------------------------------------------------------- /riegeli/digests/openssl_digester.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_ 16 | #define RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include "absl/base/attributes.h" 24 | #include "absl/strings/string_view.h" 25 | 26 | namespace riegeli { 27 | 28 | // A digester template computing checksums implemented by OpenSSL, for 29 | // `DigestingReader` and `DigestingWriter`. 30 | template 32 | class OpenSslDigester { 33 | public: 34 | OpenSslDigester() { init(&ctx_); } 35 | 36 | OpenSslDigester(const OpenSslDigester& that) = default; 37 | OpenSslDigester& operator=(const OpenSslDigester& that) = default; 38 | 39 | ABSL_ATTRIBUTE_REINITIALIZES void Reset() { 40 | init(&ctx_); 41 | is_open_ = true; 42 | } 43 | 44 | void Write(absl::string_view src) { update(&ctx_, src.data(), src.size()); } 45 | 46 | void Close() { 47 | if (is_open_) { 48 | final(reinterpret_cast(digest_.data()), &ctx_); 49 | is_open_ = false; 50 | } 51 | } 52 | 53 | std::array Digest() { 54 | if (is_open_) { 55 | H copy = ctx_; 56 | final(reinterpret_cast(digest_.data()), ©); 57 | } 58 | return digest_; 59 | } 60 | 61 | private: 62 | H ctx_; 63 | std::array digest_; 64 | bool is_open_ = true; 65 | }; 66 | 67 | } // namespace riegeli 68 | 69 | #endif // RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_ 70 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "riegeli", 3 | repo_name = "com_google_riegeli", 4 | ) 5 | 6 | bazel_dep( 7 | name = "abseil-cpp", 8 | version = "20250814.0", 9 | repo_name = "com_google_absl", 10 | ) 11 | bazel_dep( 12 | name = "abseil-py", 13 | version = "2.1.0", 14 | repo_name = "absl_py", 15 | ) 16 | bazel_dep( 17 | name = "bazel_skylib", 18 | version = "1.7.1", 19 | ) 20 | bazel_dep( 21 | name = "boringssl", 22 | version = "0.0.0-20240530-2db0eb3", 23 | ) 24 | bazel_dep( 25 | name = "brotli", 26 | version = "1.1.0", 27 | repo_name = "org_brotli", 28 | ) 29 | bazel_dep( 30 | name = "bzip2", 31 | version = "1.0.8", 32 | ) 33 | bazel_dep( 34 | name = "highwayhash", 35 | version = "0.0.0-20240305-5ad3bf8", 36 | ) 37 | bazel_dep( 38 | name = "lz4", 39 | version = "1.9.4", 40 | ) 41 | bazel_dep( 42 | name = "platforms", 43 | version = "0.0.9", 44 | ) 45 | bazel_dep( 46 | name = "protobuf", 47 | version = "28.3", 48 | repo_name = "com_google_protobuf", 49 | ) 50 | bazel_dep( 51 | name = "rules_cc", 52 | version = "0.1.2", 53 | ) 54 | bazel_dep( 55 | name = "rules_python", 56 | version = "0.36.0", 57 | ) 58 | bazel_dep( 59 | name = "snappy", 60 | version = "1.2.0", 61 | ) 62 | bazel_dep( 63 | name = "xz", 64 | version = "5.4.5.bcr.1", 65 | ) 66 | bazel_dep( 67 | name = "zlib", 68 | version = "1.3.1.bcr.3", 69 | ) 70 | bazel_dep( 71 | name = "zstd", 72 | version = "1.5.6", 73 | repo_name = "net_zstd", 74 | ) 75 | bazel_dep( 76 | name = "google_cloud_cpp", 77 | version = "3.0.0-rc1", 78 | ) 79 | 80 | # Configure hermetic Python toolchain 81 | SUPPORTED_PYTHON_VERSIONS = [ 82 | "3.8", 83 | "3.9", 84 | "3.10", 85 | "3.11", 86 | "3.12", 87 | ] 88 | 89 | DEFAULT_PYTHON_VERSION = SUPPORTED_PYTHON_VERSIONS[-1] 90 | 91 | python = use_extension("@rules_python//python/extensions:python.bzl", "python") 92 | 93 | [ 94 | python.toolchain( 95 | is_default = version == DEFAULT_PYTHON_VERSION, 96 | python_version = version, 97 | ) 98 | for version in SUPPORTED_PYTHON_VERSIONS 99 | ] 100 | -------------------------------------------------------------------------------- /riegeli/bzip2/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "bzip2_reader", 12 | srcs = ["bzip2_reader.cc"], 13 | hdrs = ["bzip2_reader.h"], 14 | deps = [ 15 | ":bzip2_error", 16 | "//riegeli/base:arithmetic", 17 | "//riegeli/base:assert", 18 | "//riegeli/base:dependency", 19 | "//riegeli/base:initializer", 20 | "//riegeli/base:object", 21 | "//riegeli/base:status", 22 | "//riegeli/base:types", 23 | "//riegeli/bytes:buffer_options", 24 | "//riegeli/bytes:buffered_reader", 25 | "//riegeli/bytes:reader", 26 | "@bzip2//:bz2", 27 | "@com_google_absl//absl/base:core_headers", 28 | "@com_google_absl//absl/status", 29 | "@com_google_absl//absl/strings", 30 | "@com_google_absl//absl/strings:string_view", 31 | ], 32 | ) 33 | 34 | cc_library( 35 | name = "bzip2_writer", 36 | srcs = ["bzip2_writer.cc"], 37 | hdrs = ["bzip2_writer.h"], 38 | deps = [ 39 | ":bzip2_error", 40 | "//riegeli/base:arithmetic", 41 | "//riegeli/base:assert", 42 | "//riegeli/base:dependency", 43 | "//riegeli/base:initializer", 44 | "//riegeli/base:object", 45 | "//riegeli/base:status", 46 | "//riegeli/base:types", 47 | "//riegeli/bytes:buffer_options", 48 | "//riegeli/bytes:buffered_writer", 49 | "//riegeli/bytes:writer", 50 | "@bzip2//:bz2", 51 | "@com_google_absl//absl/base:core_headers", 52 | "@com_google_absl//absl/status", 53 | "@com_google_absl//absl/strings", 54 | "@com_google_absl//absl/strings:string_view", 55 | ], 56 | ) 57 | 58 | cc_library( 59 | name = "bzip2_error", 60 | srcs = ["bzip2_error.cc"], 61 | hdrs = ["bzip2_error.h"], 62 | visibility = ["//visibility:private"], 63 | deps = [ 64 | "//riegeli/base:assert", 65 | "@bzip2//:bz2", 66 | "@com_google_absl//absl/status", 67 | "@com_google_absl//absl/strings", 68 | "@com_google_absl//absl/strings:string_view", 69 | ], 70 | ) 71 | -------------------------------------------------------------------------------- /riegeli/bytes/string_reader.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/bytes/string_reader.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include "absl/base/optimization.h" 23 | #include "riegeli/base/assert.h" 24 | #include "riegeli/base/types.h" 25 | #include "riegeli/bytes/reader.h" 26 | 27 | namespace riegeli { 28 | 29 | bool StringReaderBase::PullSlow(size_t min_length, size_t recommended_length) { 30 | RIEGELI_ASSERT_LT(available(), min_length) 31 | << "Failed precondition of Reader::PullSlow(): " 32 | "enough data available, use Pull() instead"; 33 | return false; 34 | } 35 | 36 | bool StringReaderBase::SeekSlow(Position new_pos) { 37 | RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos()) 38 | << "Failed precondition of Reader::SeekSlow(): " 39 | "position in the buffer, use Seek() instead"; 40 | if (ABSL_PREDICT_FALSE(!ok())) return false; 41 | RIEGELI_ASSERT_EQ(start_pos(), 0u) 42 | << "Failed invariant of StringReader: non-zero position of buffer start"; 43 | // Seeking forwards. Source ends. 44 | set_cursor(limit()); 45 | return false; 46 | } 47 | 48 | std::optional StringReaderBase::SizeImpl() { 49 | if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt; 50 | return limit_pos(); 51 | } 52 | 53 | std::unique_ptr StringReaderBase::NewReaderImpl(Position initial_pos) { 54 | if (ABSL_PREDICT_FALSE(!ok())) return nullptr; 55 | // `NewReaderImpl()` is thread-safe from this point. 56 | std::unique_ptr reader = 57 | std::make_unique>(start(), start_to_limit()); 58 | reader->Seek(initial_pos); 59 | return reader; 60 | } 61 | 62 | } // namespace riegeli 63 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/hash.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/chunk_encoding/hash.h" 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "absl/container/inlined_vector.h" 22 | #include "absl/strings/string_view.h" 23 | #include "highwayhash/hh_types.h" 24 | #include "highwayhash/highwayhash_target.h" 25 | #include "highwayhash/instruction_sets.h" 26 | #include "riegeli/base/chain.h" 27 | 28 | namespace riegeli::chunk_encoding_internal { 29 | 30 | namespace { 31 | 32 | alignas(32) const highwayhash::HHKey kHashKey = { 33 | 0x2f696c6567656952, // 'Riegeli/' 34 | 0x0a7364726f636572, // 'records\n' 35 | 0x2f696c6567656952, // 'Riegeli/' 36 | 0x0a7364726f636572, // 'records\n' 37 | }; 38 | 39 | } // namespace 40 | 41 | uint64_t Hash(absl::string_view data) { 42 | highwayhash::HHResult64 result; 43 | highwayhash::InstructionSets::Run( 44 | kHashKey, data.data(), data.size(), &result); 45 | return result; 46 | } 47 | 48 | uint64_t Hash(const Chain& data) { 49 | if (const std::optional flat = data.TryFlat(); 50 | flat != std::nullopt) { 51 | return Hash(*flat); 52 | } 53 | absl::InlinedVector fragments; 54 | fragments.reserve(data.blocks().size()); 55 | for (const absl::string_view fragment : data.blocks()) { 56 | fragments.push_back( 57 | highwayhash::StringView{fragment.data(), fragment.size()}); 58 | } 59 | highwayhash::HHResult64 result; 60 | highwayhash::InstructionSets::Run( 61 | kHashKey, fragments.data(), fragments.size(), &result); 62 | return result; 63 | } 64 | 65 | } // namespace riegeli::chunk_encoding_internal 66 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/chunk.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/chunk_encoding/chunk.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/strings/string_view.h" 21 | #include "riegeli/base/assert.h" 22 | #include "riegeli/base/chain.h" 23 | #include "riegeli/bytes/reader.h" 24 | #include "riegeli/bytes/writer.h" 25 | #include "riegeli/chunk_encoding/constants.h" 26 | #include "riegeli/chunk_encoding/hash.h" 27 | 28 | namespace riegeli { 29 | 30 | ChunkHeader::ChunkHeader(const Chain& data, ChunkType chunk_type, 31 | uint64_t num_records, uint64_t decoded_data_size) { 32 | RIEGELI_ASSERT_LE(num_records, kMaxNumRecords) 33 | << "Failed precondition of ChunkHeader::ChunkHeader(): " 34 | "number of records out of range"; 35 | set_data_size(data.size()); 36 | set_data_hash(chunk_encoding_internal::Hash(data)); 37 | set_chunk_type_and_num_records(chunk_type, num_records); 38 | set_decoded_data_size(decoded_data_size); 39 | set_header_hash(computed_header_hash()); 40 | } 41 | 42 | uint64_t ChunkHeader::computed_header_hash() const { 43 | return chunk_encoding_internal::Hash( 44 | absl::string_view(bytes() + sizeof(uint64_t), size() - sizeof(uint64_t))); 45 | } 46 | 47 | bool Chunk::WriteTo(Writer& dest) const { 48 | if (ABSL_PREDICT_FALSE( 49 | !dest.Write(absl::string_view(header.bytes(), header.size())))) { 50 | return false; 51 | } 52 | return dest.Write(data); 53 | } 54 | 55 | bool Chunk::ReadFrom(Reader& src) { 56 | if (ABSL_PREDICT_FALSE(!src.Read(header.size(), header.bytes()))) { 57 | data.Clear(); 58 | return false; 59 | } 60 | return src.Read(header.data_size(), data); 61 | } 62 | 63 | } // namespace riegeli 64 | -------------------------------------------------------------------------------- /riegeli/base/types.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_TYPES_H_ 16 | #define RIEGELI_BASE_TYPES_H_ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | #include "absl/base/nullability.h" 25 | 26 | ABSL_POINTERS_DEFAULT_NONNULL 27 | 28 | namespace riegeli { 29 | 30 | // Position in a stream of bytes, used also for stream sizes. 31 | // 32 | // This is an unsigned integer type at least as wide as `size_t`, 33 | // `std::streamoff`, and `uint64_t`. 34 | using Position = 35 | std::common_type_t, uint64_t>; 36 | 37 | // Specifies the scope of objects to flush and the intended data durability 38 | // (without a guarantee). 39 | enum class FlushType { 40 | // Makes data written so far visible in other objects, propagating flushing 41 | // through owned dependencies of the given writer. 42 | kFromObject = 0, 43 | // Makes data written so far visible outside the process, propagating flushing 44 | // through dependencies of the given writer. This is generally the default. 45 | kFromProcess = 1, 46 | // Makes data written so far visible outside the process and durable in case 47 | // of operating system crash, propagating flushing through dependencies of the 48 | // given writer. 49 | kFromMachine = 2, 50 | }; 51 | 52 | // Specifies the scope of objects to synchronize. 53 | enum class SyncType { 54 | // Propagates synchronization through owned dependencies of the given reader. 55 | kFromObject = 0, 56 | // Propagates synchronization through all dependencies of the given reader. 57 | // This is generally the default. 58 | kFromProcess = 1, 59 | }; 60 | 61 | } // namespace riegeli 62 | 63 | #endif // RIEGELI_BASE_TYPES_H_ 64 | -------------------------------------------------------------------------------- /riegeli/containers/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "linear_sorted_string_set", 12 | srcs = ["linear_sorted_string_set.cc"], 13 | hdrs = ["linear_sorted_string_set.h"], 14 | deps = [ 15 | "//riegeli/base:arithmetic", 16 | "//riegeli/base:assert", 17 | "//riegeli/base:compact_string", 18 | "//riegeli/base:compare", 19 | "//riegeli/base:debug", 20 | "//riegeli/base:dependency", 21 | "//riegeli/base:iterable", 22 | "//riegeli/base:stream_utils", 23 | "//riegeli/bytes:compact_string_writer", 24 | "//riegeli/bytes:reader", 25 | "//riegeli/bytes:writer", 26 | "//riegeli/endian:endian_reading", 27 | "//riegeli/varint:varint_reading", 28 | "//riegeli/varint:varint_writing", 29 | "@com_google_absl//absl/base:core_headers", 30 | "@com_google_absl//absl/numeric:bits", 31 | "@com_google_absl//absl/status", 32 | "@com_google_absl//absl/status:statusor", 33 | "@com_google_absl//absl/strings", 34 | "@com_google_absl//absl/strings:string_view", 35 | ], 36 | ) 37 | 38 | cc_library( 39 | name = "chunked_sorted_string_set", 40 | srcs = ["chunked_sorted_string_set.cc"], 41 | hdrs = ["chunked_sorted_string_set.h"], 42 | deps = [ 43 | ":linear_sorted_string_set", 44 | "//riegeli/base:arithmetic", 45 | "//riegeli/base:assert", 46 | "//riegeli/base:binary_search", 47 | "//riegeli/base:compact_string", 48 | "//riegeli/base:compare", 49 | "//riegeli/base:debug", 50 | "//riegeli/base:dependency", 51 | "//riegeli/base:iterable", 52 | "//riegeli/base:memory_estimator", 53 | "//riegeli/bytes:reader", 54 | "//riegeli/bytes:writer", 55 | "//riegeli/varint:varint_reading", 56 | "//riegeli/varint:varint_writing", 57 | "@com_google_absl//absl/base:core_headers", 58 | "@com_google_absl//absl/container:inlined_vector", 59 | "@com_google_absl//absl/status", 60 | "@com_google_absl//absl/status:statusor", 61 | "@com_google_absl//absl/strings", 62 | "@com_google_absl//absl/strings:string_view", 63 | ], 64 | ) 65 | -------------------------------------------------------------------------------- /riegeli/base/memory_estimator.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/memory_estimator.h" 16 | 17 | #ifdef __GXX_RTTI 18 | #include // IWYU pragma: keep 19 | #endif 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "absl/base/nullability.h" 30 | #include "absl/container/flat_hash_set.h" 31 | 32 | ABSL_POINTERS_DEFAULT_NONNULL 33 | 34 | namespace riegeli { 35 | 36 | bool MemoryEstimatorDefault::RegisterNodeImpl(const void* absl_nullable ptr) { 37 | return ptr != nullptr && objects_seen_.insert(ptr).second; 38 | } 39 | 40 | void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl() { 41 | unknown_types_no_rtti_ = true; 42 | } 43 | 44 | void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl( 45 | std::type_index index) { 46 | unknown_types_.insert(index); 47 | } 48 | 49 | std::vector MemoryEstimatorReportingUnknownTypes::UnknownTypes() 50 | const { 51 | std::vector result; 52 | result.reserve((unknown_types_no_rtti_ ? 1 : 0) + unknown_types_.size()); 53 | if (unknown_types_no_rtti_) result.emplace_back(""); 54 | for (const std::type_index index : unknown_types_) { 55 | #ifdef __GXX_RTTI 56 | int status = 0; 57 | char* const absl_nullable demangled = 58 | abi::__cxa_demangle(index.name(), nullptr, nullptr, &status); 59 | if (status == 0 && demangled != nullptr) { 60 | result.emplace_back(demangled); 61 | std::free(demangled); 62 | continue; 63 | } 64 | #endif 65 | result.emplace_back(index.name()); 66 | } 67 | std::sort(result.begin(), result.end()); 68 | return result; 69 | } 70 | 71 | } // namespace riegeli 72 | -------------------------------------------------------------------------------- /python/build_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2018 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Builds a pip package for riegeli. 17 | # 18 | # Usage (where DEST is a where to write the output, e.g. ~/riegeli-dist): 19 | # $ bazel build -c opt python:build_pip_package 20 | # $ bazel-bin/python/build_pip_package --dest DEST --sdist --bdist 21 | 22 | set -e 23 | 24 | function is_absolute { 25 | [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]] 26 | } 27 | 28 | function real_path() { 29 | if is_absolute "$1"; then 30 | printf "%s" "$1" 31 | else 32 | printf "%s/%s" "$PWD" "${1#./}" 33 | fi 34 | } 35 | 36 | function build_sdist() { 37 | local dest=$1 38 | python python/setup.py sdist --dist-dir "$dest" 39 | } 40 | 41 | function build_bdist() { 42 | local dest=$1 43 | cd bazel-bin/python/build_pip_package.runfiles/com_google_riegeli/python 44 | python setup.py bdist_wheel --dist-dir "$dest" 45 | cd - 46 | } 47 | 48 | function main() { 49 | local dest= 50 | local sdist=false 51 | local bdist=false 52 | while [[ $# -gt 0 ]]; do 53 | if [[ $1 == --dest ]]; then 54 | shift 55 | dest=$(real_path "$1") 56 | elif [[ $1 == --sdist ]]; then 57 | sdist=true 58 | elif [[ $1 == --bdist ]]; then 59 | bdist=true 60 | else 61 | printf "Unknown flag: %s\n" "$1" >&2 62 | exit 1 63 | fi 64 | shift 65 | done 66 | if [[ -z $dest ]]; then 67 | printf "Missing required flag: --dest DIRECTORY\n" >&2 68 | exit 1 69 | fi 70 | if [[ $sdist != true ]] && [[ $bdist != true ]]; then 71 | printf "Nothing to do: missing --sdist or --bdist\n" >&2 72 | exit 1 73 | fi 74 | mkdir -p -- "$dest" 75 | if [[ $sdist = true ]]; then 76 | build_sdist "$dest" 77 | fi 78 | if [[ $bdist = true ]]; then 79 | build_bdist "$dest" 80 | fi 81 | } 82 | 83 | main "$@" 84 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """PIP package setup for Riegeli.""" 15 | 16 | import setuptools 17 | from setuptools import dist 18 | 19 | with open('README.md', 'r') as fh: 20 | long_description = fh.read() 21 | 22 | 23 | class BinaryDistribution(dist.Distribution): 24 | """This class is needed in order to create OS specific wheels.""" 25 | 26 | def has_ext_modules(self): 27 | return True 28 | 29 | 30 | setuptools.setup( 31 | name='riegeli', 32 | version='0.0.1', 33 | description='File format for storing a sequence of records', 34 | long_description=long_description, 35 | long_description_content_type='text/markdown', 36 | url='https://github.com/google/riegeli', 37 | author='Google LLC', 38 | author_email='compression-dev@google.com', 39 | license='Apache License, Version 2.0', 40 | python_requires='>=3.5,<4', 41 | install_requires=[ 42 | 'protobuf>=3.8.0,<4', 43 | ], 44 | extras_require={ 45 | 'tensorflow': ['tensorflow>=1.15,<3'], 46 | }, 47 | packages=setuptools.find_packages(), 48 | include_package_data=True, 49 | package_data={'': ['**/*.so']}, 50 | distclass=BinaryDistribution, 51 | classifiers=[ 52 | 'Programming Language :: Python', 53 | 'Intended Audience :: Developers', 54 | 'Programming Language :: Python :: 3', 55 | 'Programming Language :: Python :: 3.5', 56 | 'Programming Language :: Python :: 3.6', 57 | 'Programming Language :: Python :: 3.7', 58 | 'Programming Language :: Python :: 3.8', 59 | 'License :: OSI Approved :: Apache Software License', 60 | 'Operating System :: OS Independent', 61 | 'Topic :: Software Development :: Libraries', 62 | 'Topic :: Software Development :: Libraries :: Python Modules', 63 | ], 64 | ) 65 | -------------------------------------------------------------------------------- /riegeli/tensorflow/ops/riegeli_dataset_ops.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "tensorflow/core/framework/common_shape_fns.h" 16 | #include "tensorflow/core/framework/op.h" 17 | #include "tensorflow/core/framework/shape_inference.h" 18 | 19 | namespace riegeli::tensorflow { 20 | 21 | REGISTER_OP("RiegeliDataset") 22 | .Input("filenames: string") 23 | .Input("min_buffer_size: int64") 24 | .Input("max_buffer_size: int64") 25 | .Output("handle: variant") 26 | .SetIsStateful() 27 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { 28 | ::tensorflow::shape_inference::ShapeHandle unused; 29 | // `filenames` must be a scalar or a vector. 30 | TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused)); 31 | // `min_buffer_size` could only be a scalar. 32 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); 33 | // `max_buffer_size` could only be a scalar. 34 | TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); 35 | return ::tensorflow::shape_inference::ScalarShape(c); 36 | }) 37 | .Doc(R"doc( 38 | Creates a dataset that emits the records from one or more Riegeli/records files. 39 | 40 | filenames: A scalar or vector containing the name(s) of the file(s) to be 41 | read. 42 | min_buffer_size: Tunes the minimal buffer size, which determines how much data 43 | at a time is typically read from the file. The actual buffer size changes 44 | between min_buffer_size and max_buffer_size depending on the access pattern. 45 | max_buffer_size: Tunes the maximal buffer size, which determines how much data 46 | at a time is typically read from the file. The actual buffer size changes 47 | between min_buffer_size and max_buffer_size depending on the access pattern. 48 | )doc"); 49 | 50 | } // namespace riegeli::tensorflow 51 | -------------------------------------------------------------------------------- /riegeli/bytes/fd_internal_for_cc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_ 16 | #define RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_ 17 | 18 | // Warning: Do not include this header in other headers, because the definition 19 | // of `off_t` depends on `_FILE_OFFSET_BITS` which can reliably be set only 20 | // in a standalone compilation unit. 21 | 22 | #ifdef _WIN32 23 | #include 24 | #endif 25 | #include 26 | #include 27 | #ifndef _WIN32 28 | #include 29 | #endif 30 | 31 | #include "absl/strings/string_view.h" 32 | 33 | namespace riegeli::fd_internal { 34 | 35 | #ifndef _WIN32 36 | 37 | using Offset = off_t; 38 | 39 | inline Offset LSeek(int fd, Offset offset, int whence) { 40 | return lseek(fd, offset, whence); 41 | } 42 | 43 | inline constexpr absl::string_view kLSeekFunctionName = "lseek()"; 44 | 45 | using StatInfo = struct stat; 46 | 47 | inline int FStat(int fd, StatInfo* stat_info) { return fstat(fd, stat_info); } 48 | 49 | inline constexpr absl::string_view kFStatFunctionName = "fstat()"; 50 | 51 | #else // _WIN32 52 | 53 | using Offset = __int64; 54 | 55 | inline Offset LSeek(int fd, Offset offset, int whence) { 56 | return _lseeki64(fd, offset, whence); 57 | } 58 | 59 | inline constexpr absl::string_view kLSeekFunctionName = "_lseeki64()"; 60 | 61 | // `struct __stat64` in a namespace does not work in MSVC due to a bug regarding 62 | // https://en.cppreference.com/w/cpp/language/elaborated_type_specifier. 63 | using StatInfo = struct ::__stat64; 64 | 65 | inline int FStat(int fd, StatInfo* stat_info) { 66 | return _fstat64(fd, stat_info); 67 | } 68 | 69 | inline constexpr absl::string_view kFStatFunctionName = "_fstat64()"; 70 | 71 | #endif // _WIN32 72 | 73 | } // namespace riegeli::fd_internal 74 | 75 | #endif // RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_ 76 | -------------------------------------------------------------------------------- /riegeli/xz/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "xz_reader", 12 | srcs = ["xz_reader.cc"], 13 | hdrs = ["xz_reader.h"], 14 | deps = [ 15 | ":xz_error", 16 | "//riegeli/base:arithmetic", 17 | "//riegeli/base:assert", 18 | "//riegeli/base:compare", 19 | "//riegeli/base:dependency", 20 | "//riegeli/base:initializer", 21 | "//riegeli/base:object", 22 | "//riegeli/base:recycling_pool", 23 | "//riegeli/base:status", 24 | "//riegeli/base:types", 25 | "//riegeli/bytes:buffer_options", 26 | "//riegeli/bytes:buffered_reader", 27 | "//riegeli/bytes:reader", 28 | "@com_google_absl//absl/base:core_headers", 29 | "@com_google_absl//absl/status", 30 | "@com_google_absl//absl/strings", 31 | "@com_google_absl//absl/strings:string_view", 32 | "@xz//:lzma", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "xz_writer", 38 | srcs = ["xz_writer.cc"], 39 | hdrs = ["xz_writer.h"], 40 | deps = [ 41 | ":xz_error", 42 | ":xz_reader", 43 | "//riegeli/base:arithmetic", 44 | "//riegeli/base:assert", 45 | "//riegeli/base:compare", 46 | "//riegeli/base:dependency", 47 | "//riegeli/base:initializer", 48 | "//riegeli/base:object", 49 | "//riegeli/base:recycling_pool", 50 | "//riegeli/base:status", 51 | "//riegeli/base:types", 52 | "//riegeli/bytes:buffer_options", 53 | "//riegeli/bytes:buffered_writer", 54 | "//riegeli/bytes:reader", 55 | "//riegeli/bytes:writer", 56 | "@com_google_absl//absl/base:core_headers", 57 | "@com_google_absl//absl/status", 58 | "@com_google_absl//absl/strings", 59 | "@com_google_absl//absl/strings:string_view", 60 | "@xz//:lzma", 61 | ], 62 | ) 63 | 64 | cc_library( 65 | name = "xz_error", 66 | srcs = ["xz_error.cc"], 67 | hdrs = ["xz_error.h"], 68 | visibility = ["//visibility:private"], 69 | deps = [ 70 | "//riegeli/base:assert", 71 | "@com_google_absl//absl/status", 72 | "@com_google_absl//absl/strings", 73 | "@com_google_absl//absl/strings:string_view", 74 | "@xz//:lzma", 75 | ], 76 | ) 77 | -------------------------------------------------------------------------------- /riegeli/text/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "concat", 12 | hdrs = ["concat.h"], 13 | deps = [ 14 | "//riegeli/base:initializer", 15 | "//riegeli/bytes:ostream_writer", 16 | "//riegeli/bytes:stringify", 17 | "//riegeli/bytes:stringify_writer", 18 | "//riegeli/bytes:writer", 19 | "@com_google_absl//absl/base:core_headers", 20 | ], 21 | ) 22 | 23 | cc_library( 24 | name = "write_int", 25 | srcs = ["write_int.cc"], 26 | hdrs = ["write_int.h"], 27 | deps = [ 28 | "//riegeli/base:arithmetic", 29 | "//riegeli/base:assert", 30 | "//riegeli/base:types", 31 | "//riegeli/bytes:ostream_writer", 32 | "//riegeli/bytes:write_int_internal", 33 | "//riegeli/bytes:writer", 34 | "//riegeli/endian:endian_writing", 35 | "@com_google_absl//absl/base:core_headers", 36 | "@com_google_absl//absl/numeric:bits", 37 | "@com_google_absl//absl/numeric:int128", 38 | "@com_google_absl//absl/strings:string_view", 39 | ], 40 | ) 41 | 42 | cc_library( 43 | name = "ascii_align", 44 | hdrs = ["ascii_align.h"], 45 | deps = [ 46 | ":concat", 47 | "//riegeli/base:arithmetic", 48 | "//riegeli/base:assert", 49 | "//riegeli/base:byte_fill", 50 | "//riegeli/base:chain", 51 | "//riegeli/base:initializer", 52 | "//riegeli/base:type_traits", 53 | "//riegeli/base:types", 54 | "//riegeli/bytes:ostream_writer", 55 | "//riegeli/bytes:restricted_chain_writer", 56 | "//riegeli/bytes:stringify", 57 | "//riegeli/bytes:stringify_writer", 58 | "//riegeli/bytes:writer", 59 | "@com_google_absl//absl/base:core_headers", 60 | ], 61 | ) 62 | 63 | cc_library( 64 | name = "join", 65 | hdrs = ["join.h"], 66 | deps = [ 67 | "//riegeli/base:initializer", 68 | "//riegeli/base:iterable", 69 | "//riegeli/base:types", 70 | "//riegeli/bytes:ostream_writer", 71 | "//riegeli/bytes:stringify", 72 | "//riegeli/bytes:stringify_writer", 73 | "//riegeli/bytes:writer", 74 | "@com_google_absl//absl/base:core_headers", 75 | "@com_google_absl//absl/strings:string_view", 76 | ], 77 | ) 78 | -------------------------------------------------------------------------------- /riegeli/chunk_encoding/brotli_encoder_selection.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/chunk_encoding/brotli_encoder_selection.h" 16 | 17 | #include 18 | 19 | #include "absl/base/attributes.h" 20 | #include "absl/status/status.h" 21 | #include "riegeli/base/assert.h" 22 | #include "riegeli/base/chain.h" 23 | #include "riegeli/base/maker.h" 24 | #include "riegeli/base/recycling_pool.h" 25 | #include "riegeli/brotli/brotli_writer.h" 26 | #include "riegeli/bytes/chain_writer.h" 27 | #include "riegeli/bytes/null_writer.h" 28 | #include "riegeli/bytes/writer.h" 29 | #include "riegeli/chunk_encoding/compressor_options.h" 30 | 31 | namespace riegeli::chunk_encoding_internal { 32 | 33 | ABSL_ATTRIBUTE_WEAK std::unique_ptr NewBrotliWriter( 34 | Chain* compressed, const CompressorOptions& compressor_options, 35 | ABSL_ATTRIBUTE_UNUSED const RecyclingPoolOptions& recycling_pool_options) { 36 | switch (compressor_options.brotli_encoder()) { 37 | case BrotliEncoder::kRBrotliOrCBrotli: 38 | case BrotliEncoder::kCBrotli: 39 | return NewCBrotliWriter(compressed, compressor_options); 40 | case BrotliEncoder::kRBrotli: { 41 | std::unique_ptr writer = std::make_unique(); 42 | writer->Fail(absl::UnimplementedError("Rust Brotli not available")); 43 | return writer; 44 | } 45 | } 46 | RIEGELI_ASSUME_UNREACHABLE() 47 | << "Unknown Brotli encoder: " 48 | << static_cast(compressor_options.brotli_encoder()); 49 | } 50 | 51 | std::unique_ptr NewCBrotliWriter( 52 | Chain* compressed, const CompressorOptions& compressor_options) { 53 | return std::make_unique>>( 54 | riegeli::Maker(compressed), 55 | BrotliWriterBase::Options() 56 | .set_compression_level(compressor_options.compression_level()) 57 | .set_window_log(compressor_options.brotli_window_log())); 58 | } 59 | 60 | } // namespace riegeli::chunk_encoding_internal 61 | -------------------------------------------------------------------------------- /riegeli/snappy/snappy_streams.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_SNAPPY_SNAPPY_STREAMS_H_ 16 | #define RIEGELI_SNAPPY_SNAPPY_STREAMS_H_ 17 | 18 | #include 19 | 20 | #include "riegeli/base/assert.h" 21 | #include "riegeli/base/types.h" 22 | #include "riegeli/bytes/reader.h" 23 | #include "riegeli/bytes/writer.h" 24 | #include "snappy-sinksource.h" 25 | 26 | namespace riegeli::snappy_internal { 27 | 28 | // Adapts a `Writer` to a `snappy::Sink`. 29 | class WriterSnappySink : public snappy::Sink { 30 | public: 31 | explicit WriterSnappySink(Writer* dest) 32 | : dest_(RIEGELI_EVAL_ASSERT_NOTNULL(dest)) {} 33 | 34 | WriterSnappySink(const WriterSnappySink&) = delete; 35 | WriterSnappySink& operator=(const WriterSnappySink&) = delete; 36 | 37 | void Append(const char* src, size_t length) override; 38 | char* GetAppendBuffer(size_t length, char* scratch) override; 39 | void AppendAndTakeOwnership(char* src, size_t length, 40 | void (*deleter)(void*, const char*, size_t), 41 | void* deleter_arg) override; 42 | char* GetAppendBufferVariable(size_t min_length, size_t recommended_length, 43 | char* scratch, size_t scratch_length, 44 | size_t* result_length) override; 45 | 46 | private: 47 | Writer* dest_; 48 | }; 49 | 50 | // Adapts a `Reader` to a `snappy::Source`. 51 | class ReaderSnappySource : public snappy::Source { 52 | public: 53 | explicit ReaderSnappySource(Reader* src, Position size) 54 | : src_(RIEGELI_EVAL_ASSERT_NOTNULL(src)), size_(size) {} 55 | 56 | ReaderSnappySource(const ReaderSnappySource&) = delete; 57 | ReaderSnappySource& operator=(const ReaderSnappySource&) = delete; 58 | 59 | size_t Available() const override; 60 | const char* Peek(size_t* length) override; 61 | void Skip(size_t length) override; 62 | 63 | private: 64 | Reader* src_; 65 | Position size_; 66 | }; 67 | 68 | } // namespace riegeli::snappy_internal 69 | 70 | #endif // RIEGELI_SNAPPY_SNAPPY_STREAMS_H_ 71 | -------------------------------------------------------------------------------- /riegeli/base/parallelism.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/parallelism.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include "absl/base/thread_annotations.h" 24 | #include "absl/functional/any_invocable.h" 25 | #include "absl/synchronization/mutex.h" 26 | #include "absl/time/time.h" 27 | #include "riegeli/base/assert.h" 28 | #include "riegeli/base/global.h" 29 | 30 | namespace riegeli::internal { 31 | 32 | ThreadPool::~ThreadPool() { 33 | absl::MutexLock lock(mutex_); 34 | exiting_ = true; 35 | mutex_.Await(absl::Condition( 36 | +[](size_t* num_threads) { return *num_threads == 0; }, &num_threads_)); 37 | } 38 | 39 | void ThreadPool::Schedule(absl::AnyInvocable task) { 40 | { 41 | absl::MutexLock lock(mutex_); 42 | RIEGELI_ASSERT(!exiting_) 43 | << "Failed precondition of ThreadPool::Schedule(): no new threads may " 44 | "be scheduled while the thread pool is exiting"; 45 | tasks_.push_back(std::move(task)); 46 | if (num_idle_threads_ >= tasks_.size()) return; 47 | ++num_threads_; 48 | } 49 | std::thread([this] { 50 | for (;;) { 51 | absl::ReleasableMutexLock lock(mutex_); 52 | ++num_idle_threads_; 53 | mutex_.AwaitWithTimeout( 54 | absl::Condition( 55 | +[](ThreadPool* self) 56 | ABSL_EXCLUSIVE_LOCKS_REQUIRED(self->mutex_) { 57 | return !self->tasks_.empty() || self->exiting_; 58 | }, 59 | this), 60 | absl::Seconds(1)); 61 | --num_idle_threads_; 62 | if (tasks_.empty() || exiting_) { 63 | --num_threads_; 64 | return; 65 | } 66 | absl::AnyInvocable task = std::move(tasks_.front()); 67 | tasks_.pop_front(); 68 | lock.Release(); 69 | std::move(task)(); 70 | } 71 | }).detach(); 72 | } 73 | 74 | ThreadPool& ThreadPool::global() { 75 | return Global([] { return ThreadPool(); }); 76 | } 77 | 78 | } // namespace riegeli::internal 79 | -------------------------------------------------------------------------------- /riegeli/base/cord_iterator_span.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/base/cord_iterator_span.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #include "absl/base/nullability.h" 23 | #include "absl/base/optimization.h" 24 | #include "absl/strings/cord.h" 25 | #include "absl/strings/string_view.h" 26 | #include "riegeli/base/assert.h" 27 | #include "riegeli/base/string_utils.h" 28 | 29 | ABSL_POINTERS_DEFAULT_NONNULL 30 | 31 | namespace riegeli { 32 | 33 | void CordIteratorSpan::ReadSlow(absl::Cord::CharIterator& src, size_t length, 34 | char* dest) { 35 | absl::string_view chunk = absl::Cord::ChunkRemaining(src); 36 | RIEGELI_ASSERT_LT(chunk.size(), length) 37 | << "Failed precondition of CordIteratorSpan::ReadSlow(): " 38 | "enough data available, use Read() instead"; 39 | do { 40 | std::memcpy(dest, chunk.data(), chunk.size()); 41 | absl::Cord::Advance(&src, chunk.size()); 42 | dest += chunk.size(); 43 | length -= chunk.size(); 44 | chunk = absl::Cord::ChunkRemaining(src); 45 | } while (chunk.size() < length); 46 | std::memcpy(dest, chunk.data(), length); 47 | absl::Cord::Advance(&src, length); 48 | } 49 | 50 | absl::string_view CordIteratorSpan::ToStringView(std::string& scratch) && { 51 | absl::Cord::CharIterator& iter = *iterator_; 52 | size_t length = length_; 53 | if (length == 0) return absl::string_view(); 54 | absl::string_view chunk = absl::Cord::ChunkRemaining(iter); 55 | if (ABSL_PREDICT_TRUE(chunk.size() >= length)) { 56 | absl::Cord::Advance(&iter, length); 57 | return chunk.substr(0, length); 58 | } 59 | scratch.clear(); 60 | ResizeStringAmortized(scratch, length); 61 | ReadSlow(iter, length, scratch.data()); 62 | return scratch; 63 | } 64 | 65 | void CordIteratorSpan::ToString(std::string& dest) && { 66 | absl::Cord::CharIterator& iter = *iterator_; 67 | size_t length = length_; 68 | dest.clear(); 69 | dest.resize(length); 70 | Read(iter, length, dest.data()); 71 | } 72 | 73 | } // namespace riegeli 74 | -------------------------------------------------------------------------------- /riegeli/snappy/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "snappy_reader", 12 | srcs = ["snappy_reader.cc"], 13 | hdrs = ["snappy_reader.h"], 14 | deps = [ 15 | ":snappy_streams", 16 | "//riegeli/base:assert", 17 | "//riegeli/base:chain", 18 | "//riegeli/base:dependency", 19 | "//riegeli/base:initializer", 20 | "//riegeli/base:object", 21 | "//riegeli/base:status", 22 | "//riegeli/base:types", 23 | "//riegeli/bytes:chain_reader", 24 | "//riegeli/bytes:chain_writer", 25 | "//riegeli/bytes:reader", 26 | "//riegeli/bytes:writer", 27 | "//riegeli/varint:varint_reading", 28 | "@com_google_absl//absl/base:core_headers", 29 | "@com_google_absl//absl/status", 30 | "@com_google_absl//absl/strings", 31 | "@snappy", 32 | ], 33 | ) 34 | 35 | cc_library( 36 | name = "snappy_writer", 37 | srcs = ["snappy_writer.cc"], 38 | hdrs = ["snappy_writer.h"], 39 | deps = [ 40 | ":snappy_streams", 41 | "//riegeli/base:arithmetic", 42 | "//riegeli/base:assert", 43 | "//riegeli/base:buffering", 44 | "//riegeli/base:byte_fill", 45 | "//riegeli/base:chain", 46 | "//riegeli/base:dependency", 47 | "//riegeli/base:external_ref", 48 | "//riegeli/base:initializer", 49 | "//riegeli/base:null_safe_memcpy", 50 | "//riegeli/base:object", 51 | "//riegeli/base:status", 52 | "//riegeli/base:types", 53 | "//riegeli/bytes:chain_reader", 54 | "//riegeli/bytes:reader", 55 | "//riegeli/bytes:writer", 56 | "@com_google_absl//absl/base:core_headers", 57 | "@com_google_absl//absl/status", 58 | "@com_google_absl//absl/strings", 59 | "@com_google_absl//absl/strings:cord", 60 | "@com_google_absl//absl/types:span", 61 | "@snappy", 62 | ], 63 | ) 64 | 65 | cc_library( 66 | name = "snappy_streams", 67 | srcs = ["snappy_streams.cc"], 68 | hdrs = ["snappy_streams.h"], 69 | visibility = ["//visibility:private"], 70 | deps = [ 71 | "//riegeli/base:arithmetic", 72 | "//riegeli/base:assert", 73 | "//riegeli/base:external_ref", 74 | "//riegeli/base:types", 75 | "//riegeli/bytes:reader", 76 | "//riegeli/bytes:writer", 77 | "@com_google_absl//absl/base:core_headers", 78 | "@com_google_absl//absl/strings:string_view", 79 | "@snappy", 80 | ], 81 | ) 82 | -------------------------------------------------------------------------------- /riegeli/zlib/zlib_error.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/zlib/zlib_error.h" 16 | 17 | #include 18 | 19 | #include "absl/status/status.h" 20 | #include "absl/strings/str_cat.h" 21 | #include "absl/strings/string_view.h" 22 | #include "riegeli/base/assert.h" 23 | #include "zlib.h" 24 | 25 | namespace riegeli::zlib_internal { 26 | 27 | absl::Status ZlibErrorToStatus(absl::string_view operation, int zlib_code, 28 | const char* details) { 29 | absl::StatusCode code; 30 | switch (zlib_code) { 31 | case Z_OK: 32 | return absl::OkStatus(); 33 | case Z_NEED_DICT: 34 | case Z_DATA_ERROR: 35 | code = absl::StatusCode::kInvalidArgument; 36 | break; 37 | case Z_MEM_ERROR: 38 | code = absl::StatusCode::kResourceExhausted; 39 | break; 40 | default: 41 | // Should not happen. 42 | code = absl::StatusCode::kInternal; 43 | break; 44 | } 45 | std::string message = absl::StrCat(operation, " failed"); 46 | if (details == nullptr) { 47 | switch (zlib_code) { 48 | case Z_OK: 49 | RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch"; 50 | case Z_STREAM_END: 51 | details = "stream end"; 52 | break; 53 | case Z_NEED_DICT: 54 | details = "need dictionary"; 55 | break; 56 | case Z_ERRNO: 57 | details = "file error"; 58 | break; 59 | case Z_STREAM_ERROR: 60 | details = "stream error"; 61 | break; 62 | case Z_DATA_ERROR: 63 | details = "data error"; 64 | break; 65 | case Z_MEM_ERROR: 66 | details = "insufficient memory"; 67 | break; 68 | case Z_BUF_ERROR: 69 | details = "buffer error"; 70 | break; 71 | case Z_VERSION_ERROR: 72 | details = "incompatible version"; 73 | break; 74 | default: 75 | absl::StrAppend(&message, ": unknown zlib error code: ", zlib_code); 76 | break; 77 | } 78 | } 79 | if (details != nullptr) absl::StrAppend(&message, ": ", details); 80 | return absl::Status(code, message); 81 | } 82 | 83 | } // namespace riegeli::zlib_internal 84 | -------------------------------------------------------------------------------- /riegeli/base/type_id.h: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_TYPE_ID_H_ 16 | #define RIEGELI_BASE_TYPE_ID_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include "absl/base/nullability.h" 23 | #include "riegeli/base/compare.h" 24 | 25 | ABSL_POINTERS_DEFAULT_NONNULL 26 | 27 | namespace riegeli { 28 | 29 | // `TypeId::For()` is a token which is equal to `TypeId::For()` whenever 30 | // `A` and `B` are the same type. 31 | // 32 | // `TypeId()` is another value not equal to any other. 33 | class ABSL_NULLABILITY_COMPATIBLE TypeId : public WithCompare { 34 | public: 35 | constexpr TypeId() = default; 36 | /*implicit*/ constexpr TypeId(std::nullptr_t) noexcept {} 37 | 38 | TypeId(const TypeId& that) = default; 39 | TypeId& operator=(const TypeId& that) = default; 40 | 41 | template 42 | static constexpr TypeId For(); 43 | 44 | friend constexpr bool operator==(TypeId a, TypeId b) { 45 | return a.ptr_ == b.ptr_; 46 | } 47 | friend StrongOrdering RIEGELI_COMPARE(TypeId a, TypeId b) { 48 | if (std::less<>()(a.ptr_, b.ptr_)) return StrongOrdering::less; 49 | if (std::greater<>()(a.ptr_, b.ptr_)) return StrongOrdering::greater; 50 | return StrongOrdering::equal; 51 | } 52 | 53 | template 54 | friend HashState AbslHashValue(HashState hash_state, TypeId self) { 55 | return HashState::combine(std::move(hash_state), self.ptr_); 56 | } 57 | 58 | private: 59 | using pointer = void*; // For `ABSL_NULLABILITY_COMPATIBLE`. 60 | 61 | template 62 | struct TypeIdToken; 63 | 64 | explicit constexpr TypeId(const void* ptr) : ptr_(ptr) {} 65 | 66 | const void* absl_nullable ptr_ = nullptr; 67 | }; 68 | 69 | // Implementation details follow. 70 | 71 | template 72 | struct TypeId::TypeIdToken { 73 | static const char token; 74 | }; 75 | 76 | template 77 | const char TypeId::TypeIdToken::token = '\0'; 78 | 79 | template 80 | constexpr TypeId TypeId::For() { 81 | return TypeId(&TypeIdToken::token); 82 | } 83 | 84 | } // namespace riegeli 85 | 86 | #endif // RIEGELI_BASE_TYPE_ID_H_ 87 | -------------------------------------------------------------------------------- /riegeli/zlib/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "zlib_reader", 12 | srcs = [ 13 | "zlib_dictionary.h", 14 | "zlib_reader.cc", 15 | ], 16 | hdrs = ["zlib_reader.h"], 17 | deps = [ 18 | ":zlib_error", 19 | "//riegeli/base:arithmetic", 20 | "//riegeli/base:assert", 21 | "//riegeli/base:bytes_ref", 22 | "//riegeli/base:dependency", 23 | "//riegeli/base:initializer", 24 | "//riegeli/base:object", 25 | "//riegeli/base:recycling_pool", 26 | "//riegeli/base:shared_ptr", 27 | "//riegeli/base:status", 28 | "//riegeli/base:types", 29 | "//riegeli/bytes:buffer_options", 30 | "//riegeli/bytes:buffered_reader", 31 | "//riegeli/bytes:reader", 32 | "//riegeli/endian:endian_reading", 33 | "@com_google_absl//absl/base:core_headers", 34 | "@com_google_absl//absl/status", 35 | "@com_google_absl//absl/strings", 36 | "@com_google_absl//absl/strings:string_view", 37 | "@zlib", 38 | ], 39 | ) 40 | 41 | cc_library( 42 | name = "zlib_writer", 43 | srcs = [ 44 | "zlib_dictionary.h", 45 | "zlib_writer.cc", 46 | ], 47 | hdrs = ["zlib_writer.h"], 48 | deps = [ 49 | ":zlib_error", 50 | ":zlib_reader", 51 | "//riegeli/base:arithmetic", 52 | "//riegeli/base:assert", 53 | "//riegeli/base:bytes_ref", 54 | "//riegeli/base:compare", 55 | "//riegeli/base:dependency", 56 | "//riegeli/base:initializer", 57 | "//riegeli/base:object", 58 | "//riegeli/base:recycling_pool", 59 | "//riegeli/base:shared_ptr", 60 | "//riegeli/base:status", 61 | "//riegeli/base:types", 62 | "//riegeli/bytes:buffer_options", 63 | "//riegeli/bytes:buffered_writer", 64 | "//riegeli/bytes:reader", 65 | "//riegeli/bytes:writer", 66 | "@com_google_absl//absl/base:core_headers", 67 | "@com_google_absl//absl/status", 68 | "@com_google_absl//absl/strings", 69 | "@com_google_absl//absl/strings:string_view", 70 | "@zlib", 71 | ], 72 | ) 73 | 74 | cc_library( 75 | name = "zlib_error", 76 | srcs = ["zlib_error.cc"], 77 | hdrs = ["zlib_error.h"], 78 | visibility = ["//visibility:private"], 79 | deps = [ 80 | "//riegeli/base:assert", 81 | "@com_google_absl//absl/base:core_headers", 82 | "@com_google_absl//absl/status", 83 | "@com_google_absl//absl/strings", 84 | "@com_google_absl//absl/strings:string_view", 85 | "@zlib", 86 | ], 87 | ) 88 | -------------------------------------------------------------------------------- /riegeli/bytes/array_backward_writer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/bytes/array_backward_writer.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/strings/string_view.h" 21 | #include "absl/types/span.h" 22 | #include "riegeli/base/arithmetic.h" 23 | #include "riegeli/base/assert.h" 24 | #include "riegeli/base/types.h" 25 | 26 | namespace riegeli { 27 | 28 | bool ArrayBackwardWriterBase::PushBehindScratch(size_t recommended_length) { 29 | RIEGELI_ASSERT_EQ(available(), 0u) 30 | << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): " 31 | "some space available, use Push() instead"; 32 | RIEGELI_ASSERT(!scratch_used()) 33 | << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): " 34 | "scratch used"; 35 | return ForcePushUsingScratch(); 36 | } 37 | 38 | bool ArrayBackwardWriterBase::WriteBehindScratch(absl::string_view src) { 39 | RIEGELI_ASSERT_LT(available(), src.size()) 40 | << "Failed precondition of " 41 | "PushableBackwardWriter::WriteBehindScratch(string_view): " 42 | "enough space available, use Write(string_view) instead"; 43 | RIEGELI_ASSERT(!scratch_used()) 44 | << "Failed precondition of " 45 | "PushableBackwardWriter::WriteBehindScratch(string_view): " 46 | "scratch used"; 47 | return FailOverflow(); 48 | } 49 | 50 | bool ArrayBackwardWriterBase::FlushBehindScratch(FlushType flush_type) { 51 | RIEGELI_ASSERT(!scratch_used()) 52 | << "Failed precondition of PushableBackwardWriter::FlushBehindScratch(): " 53 | "scratch used"; 54 | if (ABSL_PREDICT_FALSE(!ok())) return false; 55 | written_ = absl::MakeSpan(cursor(), start_to_cursor()); 56 | return true; 57 | } 58 | 59 | bool ArrayBackwardWriterBase::TruncateBehindScratch(Position new_size) { 60 | RIEGELI_ASSERT(!scratch_used()) 61 | << "Failed precondition of " 62 | "PushableBackwardWriter::TruncateBehindScratch(): " 63 | "scratch used"; 64 | if (ABSL_PREDICT_FALSE(!ok())) return false; 65 | if (ABSL_PREDICT_FALSE(new_size > start_to_cursor())) return false; 66 | set_cursor(start() - IntCast(new_size)); 67 | return true; 68 | } 69 | 70 | } // namespace riegeli 71 | -------------------------------------------------------------------------------- /riegeli/records/tools/tfrecord_recognizer.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/records/tools/tfrecord_recognizer.h" 16 | 17 | #include 18 | 19 | #include "absl/base/optimization.h" 20 | #include "absl/status/status.h" 21 | #include "riegeli/base/any.h" 22 | #include "riegeli/base/maker.h" 23 | #include "riegeli/bytes/reader.h" 24 | #include "riegeli/endian/endian_reading.h" 25 | #include "riegeli/zlib/zlib_reader.h" 26 | #include "tensorflow/core/lib/hash/crc32c.h" 27 | #include "tensorflow/core/lib/io/record_reader.h" 28 | 29 | namespace riegeli { 30 | 31 | bool TFRecordRecognizer::CheckFileFormat( 32 | tensorflow::io::RecordReaderOptions& record_reader_options) { 33 | if (ABSL_PREDICT_FALSE(!ok())) return false; 34 | if (ABSL_PREDICT_FALSE(!byte_reader_->Pull())) { 35 | if (ABSL_PREDICT_FALSE(!byte_reader_->ok())) { 36 | return Fail(byte_reader_->status()); 37 | } 38 | // Empty file: return `false` but leave `ok()` as `true`. This mimics the 39 | // behavior of reading functions at end of file. 40 | return false; 41 | } 42 | 43 | Any::Inlining> reader; 44 | if (RecognizeZlib(*byte_reader_)) { 45 | record_reader_options.compression_type = 46 | tensorflow::io::RecordReaderOptions::ZLIB_COMPRESSION; 47 | record_reader_options.zlib_options = 48 | tensorflow::io::ZlibCompressionOptions::DEFAULT(); 49 | record_reader_options.zlib_options.window_bits = 32; 50 | reader = riegeli::Maker>(byte_reader_); 51 | } else { 52 | record_reader_options.compression_type = 53 | tensorflow::io::RecordReaderOptions::NONE; 54 | reader = byte_reader_; 55 | } 56 | 57 | if (ABSL_PREDICT_FALSE(!reader->Pull(sizeof(uint64_t) + sizeof(uint32_t)))) { 58 | if (ABSL_PREDICT_FALSE(!reader->ok())) return Fail(reader->status()); 59 | return Fail(absl::InvalidArgumentError("Truncated TFRecord file")); 60 | } 61 | if (tensorflow::crc32c::Unmask( 62 | ReadLittleEndian(reader->cursor() + sizeof(uint64_t))) != 63 | tensorflow::crc32c::Value(reader->cursor(), sizeof(uint64_t))) { 64 | return Fail(absl::InvalidArgumentError("Corrupted TFRecord file")); 65 | } 66 | return true; 67 | } 68 | 69 | } // namespace riegeli 70 | -------------------------------------------------------------------------------- /riegeli/bytes/std_io.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/bytes/std_io.h" 16 | 17 | #include 18 | 19 | #include "riegeli/base/assert.h" 20 | #include "riegeli/base/global.h" 21 | #include "riegeli/base/sized_shared_buffer.h" 22 | #include "riegeli/bytes/fd_reader.h" 23 | #include "riegeli/bytes/fd_writer.h" 24 | 25 | namespace riegeli { 26 | 27 | namespace { 28 | 29 | int std_in_fd = 0; 30 | int std_out_fd = 1; 31 | int std_err_fd = 2; 32 | 33 | SizedSharedBuffer& StdInPending() { 34 | return Global([] { return SizedSharedBuffer(); }); 35 | } 36 | 37 | } // namespace 38 | 39 | StdIn::StdIn(Options options) : FdReader(std_in_fd, std::move(options)) { 40 | SizedSharedBuffer& pending = StdInPending(); 41 | if (!pending.empty()) RestoreBuffer(std::move(pending)); 42 | } 43 | 44 | void StdIn::Reset(Options options) { 45 | FdReader::Reset(std_in_fd, std::move(options)); 46 | SizedSharedBuffer& pending = StdInPending(); 47 | if (!pending.empty()) RestoreBuffer(std::move(pending)); 48 | } 49 | 50 | void StdIn::Done() { 51 | RIEGELI_ASSERT(StdInPending().empty()) 52 | << "Multiple instances of StdIn in use at a time"; 53 | if (available() > 0 && !SupportsRandomAccess()) StdInPending() = SaveBuffer(); 54 | FdReader::Done(); 55 | } 56 | 57 | StdOut::StdOut(Options options) : FdWriter(std_out_fd, std::move(options)) {} 58 | 59 | void StdOut::Reset(Options options) { 60 | FdWriter::Reset(std_out_fd, std::move(options)); 61 | } 62 | 63 | StdErr::StdErr(Options options) : FdWriter(std_err_fd, std::move(options)) {} 64 | 65 | void StdErr::Reset(Options options) { 66 | FdWriter::Reset(std_err_fd, std::move(options)); 67 | } 68 | 69 | InjectedStdInFd::InjectedStdInFd(int fd) 70 | : old_fd_(std::exchange(std_in_fd, fd)), 71 | old_pending_(std::move(StdInPending())) {} 72 | 73 | InjectedStdInFd::~InjectedStdInFd() { 74 | std_in_fd = old_fd_; 75 | StdInPending() = std::move(old_pending_); 76 | } 77 | 78 | InjectedStdOutFd::InjectedStdOutFd(int fd) 79 | : old_fd_(std::exchange(std_out_fd, fd)) {} 80 | 81 | InjectedStdOutFd::~InjectedStdOutFd() { std_out_fd = old_fd_; } 82 | 83 | InjectedStdErrFd::InjectedStdErrFd(int fd) 84 | : old_fd_(std::exchange(std_err_fd, fd)) {} 85 | 86 | InjectedStdErrFd::~InjectedStdErrFd() { std_err_fd = old_fd_; } 87 | 88 | } // namespace riegeli 89 | -------------------------------------------------------------------------------- /riegeli/base/initializer_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef RIEGELI_BASE_INITIALIZER_INTERNAL_H_ 16 | #define RIEGELI_BASE_INITIALIZER_INTERNAL_H_ 17 | 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include "absl/base/casts.h" 24 | #include "absl/base/nullability.h" 25 | 26 | ABSL_POINTERS_DEFAULT_NONNULL 27 | 28 | namespace riegeli::initializer_internal { 29 | 30 | // `CanBindReference::value` is `true` if `Arg&&` can be implicitly 31 | // converted to `T&&` without creating a temporary. 32 | // 33 | // Due to not all compilers implementing http://wg21.link/cwg2352 (converting 34 | // `T*&` to `const T* const&` could have bound the result to a temporary), 35 | // this covers also the case when the corresponding pointers can be converted. 36 | // `BindReference()` should be used for the actual conversion. 37 | 38 | template 39 | struct CanBindReference : std::false_type {}; 40 | 41 | template 42 | struct CanBindReference : std::is_convertible {}; 43 | 44 | template 45 | struct CanBindReference : std::false_type {}; 46 | 47 | template 48 | struct CanBindReference : std::is_convertible { 49 | }; 50 | 51 | template 52 | struct CanBindReference : std::false_type {}; 53 | 54 | template 55 | struct CanBindReference : std::is_convertible {}; 56 | 57 | // `BindReference(arg)` returns `arg` implicitly converted to `T&&`. 58 | // 59 | // Due to not all compilers implementing http://wg21.link/cwg2352 (converting 60 | // `T*&` to `const T* const&` could have bound the result to a temporary), 61 | // this is not implemented as a simple implicit conversion, but by converting 62 | // the reference to a pointer, implicitly converting the pointer, and 63 | // dereferencing back. 64 | template ::value, int> = 0> 66 | inline T&& BindReference(Arg&& arg) { 67 | return std::forward( 68 | *absl::implicit_cast*>(&arg)); 69 | } 70 | 71 | } // namespace riegeli::initializer_internal 72 | 73 | #endif // RIEGELI_BASE_INITIALIZER_INTERNAL_H_ 74 | -------------------------------------------------------------------------------- /riegeli/lz4/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "lz4_reader", 12 | srcs = ["lz4_reader.cc"], 13 | hdrs = ["lz4_reader.h"], 14 | # zstd_reader.cc has #define before #include to influence what the included 15 | # files provide. 16 | features = ["-use_header_modules"], 17 | deps = [ 18 | ":lz4_dictionary", 19 | "//riegeli/base:arithmetic", 20 | "//riegeli/base:assert", 21 | "//riegeli/base:dependency", 22 | "//riegeli/base:initializer", 23 | "//riegeli/base:object", 24 | "//riegeli/base:recycling_pool", 25 | "//riegeli/base:status", 26 | "//riegeli/base:types", 27 | "//riegeli/bytes:buffer_options", 28 | "//riegeli/bytes:buffered_reader", 29 | "//riegeli/bytes:reader", 30 | "@com_google_absl//absl/base:core_headers", 31 | "@com_google_absl//absl/status", 32 | "@com_google_absl//absl/strings", 33 | "@com_google_absl//absl/strings:string_view", 34 | "@lz4//:lz4_frame", 35 | ], 36 | ) 37 | 38 | cc_library( 39 | name = "lz4_writer", 40 | srcs = ["lz4_writer.cc"], 41 | hdrs = ["lz4_writer.h"], 42 | # lz4_writer.cc has #define before #include to influence what the included 43 | # files provide. 44 | features = ["-use_header_modules"], 45 | deps = [ 46 | ":lz4_dictionary", 47 | ":lz4_reader", 48 | "//riegeli/base:arithmetic", 49 | "//riegeli/base:assert", 50 | "//riegeli/base:dependency", 51 | "//riegeli/base:initializer", 52 | "//riegeli/base:object", 53 | "//riegeli/base:recycling_pool", 54 | "//riegeli/base:status", 55 | "//riegeli/base:types", 56 | "//riegeli/bytes:buffer_options", 57 | "//riegeli/bytes:buffered_writer", 58 | "//riegeli/bytes:reader", 59 | "//riegeli/bytes:writer", 60 | "@com_google_absl//absl/base:core_headers", 61 | "@com_google_absl//absl/status", 62 | "@com_google_absl//absl/strings", 63 | "@com_google_absl//absl/strings:string_view", 64 | "@lz4", 65 | "@lz4//:lz4_frame", 66 | ], 67 | ) 68 | 69 | cc_library( 70 | name = "lz4_dictionary", 71 | srcs = ["lz4_dictionary.cc"], 72 | hdrs = ["lz4_dictionary.h"], 73 | # lz4_dictionary.cc has #define before #include to influence what the 74 | # included files provide. 75 | features = ["-use_header_modules"], 76 | visibility = ["//visibility:private"], 77 | deps = [ 78 | "//riegeli/base:bytes_ref", 79 | "//riegeli/base:initializer", 80 | "//riegeli/base:shared_ptr", 81 | "@com_google_absl//absl/base", 82 | "@com_google_absl//absl/base:core_headers", 83 | "@com_google_absl//absl/strings:string_view", 84 | "@lz4//:lz4_frame", 85 | ], 86 | ) 87 | -------------------------------------------------------------------------------- /riegeli/bzip2/bzip2_error.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "riegeli/bzip2/bzip2_error.h" 16 | 17 | #include 18 | 19 | #include "absl/status/status.h" 20 | #include "absl/strings/str_cat.h" 21 | #include "absl/strings/string_view.h" 22 | #include "bzlib.h" 23 | #include "riegeli/base/assert.h" 24 | 25 | namespace riegeli::bzip2_internal { 26 | 27 | absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code) { 28 | absl::StatusCode code; 29 | switch (bzlib_code) { 30 | case BZ_OK: 31 | case BZ_RUN_OK: 32 | case BZ_FLUSH_OK: 33 | case BZ_FINISH_OK: 34 | return absl::OkStatus(); 35 | case BZ_DATA_ERROR: 36 | case BZ_DATA_ERROR_MAGIC: 37 | code = absl::StatusCode::kInvalidArgument; 38 | break; 39 | case BZ_MEM_ERROR: 40 | code = absl::StatusCode::kResourceExhausted; 41 | break; 42 | default: 43 | // Should not happen. 44 | code = absl::StatusCode::kInternal; 45 | break; 46 | } 47 | std::string message = absl::StrCat(operation, " failed"); 48 | absl::string_view details; 49 | switch (bzlib_code) { 50 | case BZ_OK: 51 | case BZ_RUN_OK: 52 | case BZ_FLUSH_OK: 53 | case BZ_FINISH_OK: 54 | RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch"; 55 | case BZ_STREAM_END: 56 | details = "stream end"; 57 | break; 58 | case BZ_SEQUENCE_ERROR: 59 | details = "sequence error"; 60 | break; 61 | case BZ_PARAM_ERROR: 62 | details = "parameter error"; 63 | break; 64 | case BZ_MEM_ERROR: 65 | details = "memory error"; 66 | break; 67 | case BZ_DATA_ERROR: 68 | details = "data error"; 69 | break; 70 | case BZ_DATA_ERROR_MAGIC: 71 | details = "data error (magic)"; 72 | break; 73 | case BZ_IO_ERROR: 74 | details = "I/O error"; 75 | break; 76 | case BZ_UNEXPECTED_EOF: 77 | details = "unexpected EOF"; 78 | break; 79 | case BZ_OUTBUFF_FULL: 80 | details = "output buffer full"; 81 | break; 82 | case BZ_CONFIG_ERROR: 83 | details = "config error"; 84 | break; 85 | default: 86 | absl::StrAppend(&message, ": unknown bzlib error code: ", bzlib_code); 87 | break; 88 | } 89 | if (!details.empty()) absl::StrAppend(&message, ": ", details); 90 | return absl::Status(code, message); 91 | } 92 | 93 | } // namespace riegeli::bzip2_internal 94 | -------------------------------------------------------------------------------- /riegeli/lines/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "newline", 12 | hdrs = ["newline.h"], 13 | deps = ["@com_google_absl//absl/strings:string_view"], 14 | ) 15 | 16 | cc_library( 17 | name = "line_reading", 18 | srcs = ["line_reading.cc"], 19 | hdrs = ["line_reading.h"], 20 | deps = [ 21 | ":newline", 22 | "//riegeli/base:arithmetic", 23 | "//riegeli/base:assert", 24 | "//riegeli/base:chain", 25 | "//riegeli/bytes:reader", 26 | "@com_google_absl//absl/base:core_headers", 27 | "@com_google_absl//absl/status", 28 | "@com_google_absl//absl/strings", 29 | "@com_google_absl//absl/strings:cord", 30 | "@com_google_absl//absl/strings:string_view", 31 | ], 32 | ) 33 | 34 | cc_library( 35 | name = "line_writing", 36 | hdrs = ["line_writing.h"], 37 | deps = [ 38 | ":newline", 39 | "//riegeli/base:assert", 40 | "//riegeli/base:type_traits", 41 | "//riegeli/bytes:stringify", 42 | "//riegeli/bytes:writer", 43 | "@com_google_absl//absl/base:core_headers", 44 | ], 45 | ) 46 | 47 | cc_library( 48 | name = "text_reader", 49 | srcs = ["text_reader.cc"], 50 | hdrs = ["text_reader.h"], 51 | deps = [ 52 | ":newline", 53 | "//riegeli/base:any", 54 | "//riegeli/base:arithmetic", 55 | "//riegeli/base:assert", 56 | "//riegeli/base:dependency", 57 | "//riegeli/base:initializer", 58 | "//riegeli/base:object", 59 | "//riegeli/base:status", 60 | "//riegeli/base:types", 61 | "//riegeli/bytes:buffer_options", 62 | "//riegeli/bytes:buffered_reader", 63 | "//riegeli/bytes:prefix_limiting_reader", 64 | "//riegeli/bytes:reader", 65 | "@com_google_absl//absl/base:core_headers", 66 | "@com_google_absl//absl/status", 67 | "@com_google_absl//absl/strings", 68 | ], 69 | ) 70 | 71 | cc_library( 72 | name = "text_writer", 73 | srcs = ["text_writer.cc"], 74 | hdrs = ["text_writer.h"], 75 | deps = [ 76 | ":line_writing", 77 | ":newline", 78 | "//riegeli/base:any", 79 | "//riegeli/base:arithmetic", 80 | "//riegeli/base:assert", 81 | "//riegeli/base:dependency", 82 | "//riegeli/base:initializer", 83 | "//riegeli/base:object", 84 | "//riegeli/base:status", 85 | "//riegeli/base:types", 86 | "//riegeli/bytes:buffer_options", 87 | "//riegeli/bytes:buffered_writer", 88 | "//riegeli/bytes:prefix_limiting_writer", 89 | "//riegeli/bytes:writer", 90 | "@com_google_absl//absl/base:core_headers", 91 | "@com_google_absl//absl/status", 92 | "@com_google_absl//absl/strings", 93 | "@com_google_absl//absl/strings:string_view", 94 | ], 95 | ) 96 | -------------------------------------------------------------------------------- /riegeli/gcs/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "gcs_object", 12 | srcs = ["gcs_object.cc"], 13 | hdrs = ["gcs_object.h"], 14 | deps = [ 15 | "//riegeli/base:arithmetic", 16 | "//riegeli/base:compare", 17 | "//riegeli/base:debug", 18 | "//riegeli/base:global", 19 | "//riegeli/base:reset", 20 | "//riegeli/base:string_ref", 21 | "//riegeli/bytes:ostream_writer", 22 | "//riegeli/bytes:string_writer", 23 | "//riegeli/bytes:stringify", 24 | "//riegeli/bytes:stringify_writer", 25 | "//riegeli/bytes:writer", 26 | "@com_google_absl//absl/base:core_headers", 27 | "@com_google_absl//absl/status", 28 | "@com_google_absl//absl/strings", 29 | "@com_google_absl//absl/strings:string_view", 30 | ], 31 | ) 32 | 33 | cc_library( 34 | name = "gcs_reader", 35 | srcs = [ 36 | "gcs_internal.h", 37 | "gcs_reader.cc", 38 | ], 39 | hdrs = ["gcs_reader.h"], 40 | deps = [ 41 | ":gcs_object", 42 | "//riegeli/base:arithmetic", 43 | "//riegeli/base:assert", 44 | "//riegeli/base:initializer", 45 | "//riegeli/base:object", 46 | "//riegeli/base:reset", 47 | "//riegeli/base:status", 48 | "//riegeli/base:type_traits", 49 | "//riegeli/base:types", 50 | "//riegeli/bytes:buffer_options", 51 | "//riegeli/bytes:istream_reader", 52 | "//riegeli/bytes:reader", 53 | "@com_google_absl//absl/base:core_headers", 54 | "@com_google_absl//absl/status", 55 | "@com_google_absl//absl/strings", 56 | "@com_google_absl//absl/strings:string_view", 57 | "@google_cloud_cpp//:common", 58 | "@google_cloud_cpp//:storage", 59 | ], 60 | ) 61 | 62 | cc_library( 63 | name = "gcs_writer", 64 | srcs = [ 65 | "gcs_internal.h", 66 | "gcs_writer.cc", 67 | ], 68 | hdrs = ["gcs_writer.h"], 69 | deps = [ 70 | ":gcs_object", 71 | ":gcs_reader", 72 | "//riegeli/base:arithmetic", 73 | "//riegeli/base:assert", 74 | "//riegeli/base:initializer", 75 | "//riegeli/base:object", 76 | "//riegeli/base:reset", 77 | "//riegeli/base:status", 78 | "//riegeli/base:string_ref", 79 | "//riegeli/base:type_traits", 80 | "//riegeli/base:types", 81 | "//riegeli/bytes:buffer_options", 82 | "//riegeli/bytes:ostream_writer", 83 | "//riegeli/bytes:reader", 84 | "//riegeli/bytes:writer", 85 | "@com_google_absl//absl/base:core_headers", 86 | "@com_google_absl//absl/status", 87 | "@com_google_absl//absl/strings", 88 | "@com_google_absl//absl/strings:string_view", 89 | "@google_cloud_cpp//:common", 90 | "@google_cloud_cpp//:storage", 91 | ], 92 | ) 93 | -------------------------------------------------------------------------------- /riegeli/zstd/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "zstd_reader", 12 | srcs = ["zstd_reader.cc"], 13 | hdrs = ["zstd_reader.h"], 14 | # zstd_reader.cc has #define before #include to influence what the included 15 | # files provide. 16 | features = ["-use_header_modules"], 17 | deps = [ 18 | ":zstd_dictionary", 19 | "//riegeli/base:arithmetic", 20 | "//riegeli/base:assert", 21 | "//riegeli/base:dependency", 22 | "//riegeli/base:initializer", 23 | "//riegeli/base:object", 24 | "//riegeli/base:recycling_pool", 25 | "//riegeli/base:status", 26 | "//riegeli/base:types", 27 | "//riegeli/bytes:buffer_options", 28 | "//riegeli/bytes:buffered_reader", 29 | "//riegeli/bytes:reader", 30 | "@com_google_absl//absl/base:core_headers", 31 | "@com_google_absl//absl/status", 32 | "@com_google_absl//absl/strings", 33 | "@net_zstd//:zstd", 34 | ], 35 | ) 36 | 37 | cc_library( 38 | name = "zstd_writer", 39 | srcs = ["zstd_writer.cc"], 40 | hdrs = ["zstd_writer.h"], 41 | # zstd_writer.cc has #define before #include to influence what the included 42 | # files provide. 43 | features = ["-use_header_modules"], 44 | deps = [ 45 | ":zstd_dictionary", 46 | ":zstd_reader", 47 | "//riegeli/base:arithmetic", 48 | "//riegeli/base:assert", 49 | "//riegeli/base:dependency", 50 | "//riegeli/base:initializer", 51 | "//riegeli/base:object", 52 | "//riegeli/base:recycling_pool", 53 | "//riegeli/base:status", 54 | "//riegeli/base:types", 55 | "//riegeli/bytes:buffer_options", 56 | "//riegeli/bytes:buffered_writer", 57 | "//riegeli/bytes:reader", 58 | "//riegeli/bytes:writer", 59 | "@com_google_absl//absl/base:core_headers", 60 | "@com_google_absl//absl/status", 61 | "@com_google_absl//absl/strings", 62 | "@com_google_absl//absl/strings:string_view", 63 | "@net_zstd//:zstd", 64 | ], 65 | ) 66 | 67 | cc_library( 68 | name = "zstd_dictionary", 69 | srcs = ["zstd_dictionary.cc"], 70 | hdrs = ["zstd_dictionary.h"], 71 | # zstd_dictionary.cc has #define before #include to influence what the 72 | # included files provide. 73 | features = ["-use_header_modules"], 74 | visibility = ["//visibility:private"], 75 | deps = [ 76 | "//riegeli/base:arithmetic", 77 | "//riegeli/base:bytes_ref", 78 | "//riegeli/base:initializer", 79 | "//riegeli/base:shared_ptr", 80 | "@com_google_absl//absl/base", 81 | "@com_google_absl//absl/base:core_headers", 82 | "@com_google_absl//absl/strings:string_view", 83 | "@com_google_absl//absl/synchronization", 84 | "@net_zstd//:zstd", 85 | ], 86 | ) 87 | -------------------------------------------------------------------------------- /riegeli/brotli/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "brotli_reader", 12 | srcs = ["brotli_reader.cc"], 13 | hdrs = ["brotli_reader.h"], 14 | deps = [ 15 | ":brotli_allocator", 16 | ":brotli_dictionary", 17 | "//riegeli/base:arithmetic", 18 | "//riegeli/base:assert", 19 | "//riegeli/base:dependency", 20 | "//riegeli/base:initializer", 21 | "//riegeli/base:object", 22 | "//riegeli/base:shared_ptr", 23 | "//riegeli/base:status", 24 | "//riegeli/base:types", 25 | "//riegeli/bytes:pullable_reader", 26 | "//riegeli/bytes:reader", 27 | "@com_google_absl//absl/base:core_headers", 28 | "@com_google_absl//absl/status", 29 | "@com_google_absl//absl/strings", 30 | "@com_google_absl//absl/strings:string_view", 31 | "@org_brotli//:brotlicommon", 32 | "@org_brotli//:brotlidec", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "brotli_writer", 38 | srcs = ["brotli_writer.cc"], 39 | hdrs = ["brotli_writer.h"], 40 | deps = [ 41 | ":brotli_allocator", 42 | ":brotli_dictionary", 43 | ":brotli_reader", 44 | "//riegeli/base:arithmetic", 45 | "//riegeli/base:assert", 46 | "//riegeli/base:dependency", 47 | "//riegeli/base:initializer", 48 | "//riegeli/base:object", 49 | "//riegeli/base:shared_ptr", 50 | "//riegeli/base:status", 51 | "//riegeli/base:types", 52 | "//riegeli/bytes:buffer_options", 53 | "//riegeli/bytes:buffered_writer", 54 | "//riegeli/bytes:reader", 55 | "//riegeli/bytes:writer", 56 | "@com_google_absl//absl/base:core_headers", 57 | "@com_google_absl//absl/status", 58 | "@com_google_absl//absl/strings", 59 | "@com_google_absl//absl/strings:string_view", 60 | "@org_brotli//:brotlienc", 61 | ], 62 | ) 63 | 64 | cc_library( 65 | name = "brotli_dictionary", 66 | srcs = ["brotli_dictionary.cc"], 67 | hdrs = ["brotli_dictionary.h"], 68 | visibility = ["//visibility:private"], 69 | deps = [ 70 | "//riegeli/base:assert", 71 | "//riegeli/base:bytes_ref", 72 | "//riegeli/base:initializer", 73 | "//riegeli/base:shared_ptr", 74 | "@com_google_absl//absl/base", 75 | "@com_google_absl//absl/base:core_headers", 76 | "@com_google_absl//absl/strings:string_view", 77 | "@com_google_absl//absl/types:span", 78 | "@org_brotli//:brotlicommon", 79 | "@org_brotli//:brotlienc", 80 | ], 81 | ) 82 | 83 | cc_library( 84 | name = "brotli_allocator", 85 | srcs = ["brotli_allocator.cc"], 86 | hdrs = ["brotli_allocator.h"], 87 | visibility = ["//visibility:private"], 88 | deps = [ 89 | "//riegeli/base:shared_ptr", 90 | "@org_brotli//:brotlicommon", 91 | ], 92 | ) 93 | -------------------------------------------------------------------------------- /riegeli/tensorflow/io/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "file_reader", 12 | srcs = ["file_reader.cc"], 13 | hdrs = ["file_reader.h"], 14 | deps = [ 15 | "//riegeli/base:arithmetic", 16 | "//riegeli/base:assert", 17 | "//riegeli/base:buffering", 18 | "//riegeli/base:chain", 19 | "//riegeli/base:dependency", 20 | "//riegeli/base:external_ref", 21 | "//riegeli/base:initializer", 22 | "//riegeli/base:null_safe_memcpy", 23 | "//riegeli/base:object", 24 | "//riegeli/base:reset", 25 | "//riegeli/base:sized_shared_buffer", 26 | "//riegeli/base:status", 27 | "//riegeli/base:types", 28 | "//riegeli/bytes:backward_writer", 29 | "//riegeli/bytes:buffer_options", 30 | "//riegeli/bytes:path_ref", 31 | "//riegeli/bytes:reader", 32 | "//riegeli/bytes:writer", 33 | "@com_google_absl//absl/base:core_headers", 34 | "@com_google_absl//absl/functional:function_ref", 35 | "@com_google_absl//absl/status", 36 | "@com_google_absl//absl/strings", 37 | "@com_google_absl//absl/strings:cord", 38 | "@com_google_absl//absl/strings:string_view", 39 | "@com_google_absl//absl/types:span", 40 | "@local_config_tf//:tf_header_lib", 41 | ], 42 | ) 43 | 44 | cc_library( 45 | name = "file_writer", 46 | srcs = ["file_writer.cc"], 47 | hdrs = ["file_writer.h"], 48 | deps = [ 49 | ":file_reader", 50 | "//riegeli/base:arithmetic", 51 | "//riegeli/base:assert", 52 | "//riegeli/base:buffering", 53 | "//riegeli/base:byte_fill", 54 | "//riegeli/base:chain", 55 | "//riegeli/base:dependency", 56 | "//riegeli/base:external_ref", 57 | "//riegeli/base:initializer", 58 | "//riegeli/base:object", 59 | "//riegeli/base:reset", 60 | "//riegeli/base:shared_buffer", 61 | "//riegeli/base:status", 62 | "//riegeli/base:types", 63 | "//riegeli/bytes:buffer_options", 64 | "//riegeli/bytes:path_ref", 65 | "//riegeli/bytes:reader", 66 | "//riegeli/bytes:writer", 67 | "@com_google_absl//absl/base:core_headers", 68 | "@com_google_absl//absl/status", 69 | "@com_google_absl//absl/strings", 70 | "@com_google_absl//absl/strings:cord", 71 | "@com_google_absl//absl/strings:string_view", 72 | "@local_config_tf//:tf_header_lib", 73 | ], 74 | ) 75 | 76 | cc_library( 77 | name = "tstring_writer", 78 | hdrs = ["tstring_writer.h"], 79 | deps = [ 80 | "//riegeli/base:arithmetic", 81 | "//riegeli/base:assert", 82 | "//riegeli/base:dependency", 83 | "//riegeli/base:initializer", 84 | "//riegeli/base:object", 85 | "//riegeli/bytes:resizable_writer", 86 | "@local_config_tf//:tf_header_lib", 87 | ], 88 | ) 89 | -------------------------------------------------------------------------------- /python/riegeli/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Writes or reads Riegeli/records files.""" 15 | 16 | from riegeli.base import riegeli_error 17 | from riegeli.records import record_position 18 | from riegeli.records import record_reader 19 | from riegeli.records import record_writer 20 | from riegeli.records import records_metadata_pb2 21 | from riegeli.records import skipped_region 22 | 23 | __all__ = ( 24 | 'RiegeliError', 25 | 'CancelledError', 26 | 'UnknownError', 27 | 'InvalidArgumentError', 28 | 'DeadlineExceededError', 29 | 'NotFoundError', 30 | 'AlreadyExistsError', 31 | 'PermissionDeniedError', 32 | 'UnauthenticatedError', 33 | 'ResourceExhaustedError', 34 | 'FailedPreconditionError', 35 | 'AbortedError', 36 | 'OutOfRangeError', 37 | 'UnimplementedError', 38 | 'InternalError', 39 | 'UnavailableError', 40 | 'DataLossError', 41 | 'FlushType', 42 | 'RecordPosition', 43 | 'SkippedRegion', 44 | 'RecordsMetadata', 45 | 'set_record_type', 46 | 'RecordWriter', 47 | 'EXISTENCE_ONLY', 48 | 'get_record_type', 49 | 'RecordReader', 50 | ) 51 | 52 | # pylint: disable=invalid-name 53 | RiegeliError = riegeli_error.RiegeliError 54 | CancelledError = riegeli_error.CancelledError 55 | UnknownError = riegeli_error.UnknownError 56 | InvalidArgumentError = riegeli_error.InvalidArgumentError 57 | DeadlineExceededError = riegeli_error.DeadlineExceededError 58 | NotFoundError = riegeli_error.NotFoundError 59 | AlreadyExistsError = riegeli_error.AlreadyExistsError 60 | PermissionDeniedError = riegeli_error.PermissionDeniedError 61 | UnauthenticatedError = riegeli_error.UnauthenticatedError 62 | ResourceExhaustedError = riegeli_error.ResourceExhaustedError 63 | FailedPreconditionError = riegeli_error.FailedPreconditionError 64 | AbortedError = riegeli_error.AbortedError 65 | OutOfRangeError = riegeli_error.OutOfRangeError 66 | UnimplementedError = riegeli_error.UnimplementedError 67 | InternalError = riegeli_error.InternalError 68 | UnavailableError = riegeli_error.UnavailableError 69 | DataLossError = riegeli_error.DataLossError 70 | RecordPosition = record_position.RecordPosition 71 | SkippedRegion = skipped_region.SkippedRegion 72 | RecordsMetadata = records_metadata_pb2.RecordsMetadata 73 | FlushType = record_writer.FlushType 74 | set_record_type = record_writer.set_record_type 75 | RecordWriter = record_writer.RecordWriter 76 | EXISTENCE_ONLY = record_reader.EXISTENCE_ONLY 77 | get_record_type = record_reader.get_record_type 78 | RecordReader = record_reader.RecordReader 79 | -------------------------------------------------------------------------------- /riegeli/csv/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_cc//cc:defs.bzl", "cc_library") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | features = ["header_modules"], 6 | ) 7 | 8 | licenses(["notice"]) 9 | 10 | cc_library( 11 | name = "csv_reader", 12 | srcs = ["csv_reader.cc"], 13 | hdrs = ["csv_reader.h"], 14 | deps = [ 15 | ":csv_record", 16 | "//riegeli/base:arithmetic", 17 | "//riegeli/base:assert", 18 | "//riegeli/base:debug", 19 | "//riegeli/base:dependency", 20 | "//riegeli/base:initializer", 21 | "//riegeli/base:object", 22 | "//riegeli/base:reset", 23 | "//riegeli/base:status", 24 | "//riegeli/bytes:reader", 25 | "//riegeli/bytes:string_reader", 26 | "//riegeli/bytes:string_writer", 27 | "//riegeli/lines:line_reading", 28 | "@com_google_absl//absl/base:core_headers", 29 | "@com_google_absl//absl/status", 30 | "@com_google_absl//absl/strings", 31 | "@com_google_absl//absl/strings:string_view", 32 | "@com_google_absl//absl/types:span", 33 | ], 34 | ) 35 | 36 | cc_library( 37 | name = "csv_writer", 38 | srcs = ["csv_writer.cc"], 39 | hdrs = ["csv_writer.h"], 40 | deps = [ 41 | ":csv_record", 42 | "//riegeli/base:arithmetic", 43 | "//riegeli/base:assert", 44 | "//riegeli/base:debug", 45 | "//riegeli/base:dependency", 46 | "//riegeli/base:initializer", 47 | "//riegeli/base:iterable", 48 | "//riegeli/base:object", 49 | "//riegeli/base:reset", 50 | "//riegeli/base:status", 51 | "//riegeli/bytes:string_writer", 52 | "//riegeli/bytes:writer", 53 | "//riegeli/lines:line_writing", 54 | "//riegeli/lines:newline", 55 | "@com_google_absl//absl/base:core_headers", 56 | "@com_google_absl//absl/status", 57 | "@com_google_absl//absl/strings", 58 | "@com_google_absl//absl/strings:string_view", 59 | ], 60 | ) 61 | 62 | cc_library( 63 | name = "csv_record", 64 | srcs = ["csv_record.cc"], 65 | hdrs = ["csv_record.h"], 66 | deps = [ 67 | "//riegeli/base:arithmetic", 68 | "//riegeli/base:assert", 69 | "//riegeli/base:compare", 70 | "//riegeli/base:global", 71 | "//riegeli/base:initializer", 72 | "//riegeli/base:iterable", 73 | "//riegeli/base:reset", 74 | "//riegeli/base:shared_ptr", 75 | "//riegeli/base:string_ref", 76 | "//riegeli/base:type_traits", 77 | "//riegeli/bytes:ostream_writer", 78 | "//riegeli/bytes:string_writer", 79 | "//riegeli/bytes:stringify_writer", 80 | "//riegeli/bytes:writer", 81 | "@com_google_absl//absl/base", 82 | "@com_google_absl//absl/base:core_headers", 83 | "@com_google_absl//absl/container:flat_hash_map", 84 | "@com_google_absl//absl/status", 85 | "@com_google_absl//absl/strings", 86 | "@com_google_absl//absl/strings:string_view", 87 | "@com_google_absl//absl/synchronization", 88 | "@com_google_absl//absl/types:span", 89 | ], 90 | ) 91 | --------------------------------------------------------------------------------