├── python
├── __init__.py
├── dummy_binary.py
├── riegeli
│ ├── base
│ │ ├── __init__.py
│ │ └── BUILD
│ ├── records
│ │ ├── __init__.py
│ │ ├── examples
│ │ │ ├── __init__.py
│ │ │ ├── BUILD
│ │ │ └── write_read_records.py
│ │ ├── tests
│ │ │ ├── __init__.py
│ │ │ ├── records_test.proto
│ │ │ └── BUILD
│ │ ├── records_metadata.proto
│ │ ├── skipped_region.py
│ │ └── record_position.h
│ ├── tensorflow
│ │ ├── __init__.py
│ │ ├── ops
│ │ │ └── __init__.py
│ │ ├── kernel_tests
│ │ │ └── __init__.py
│ │ └── BUILD
│ ├── BUILD
│ ├── BUILD.tpl
│ ├── py_extension.bzl
│ ├── bytes
│ │ └── BUILD
│ └── __init__.py
├── MANIFEST.in
├── README.md
├── BUILD
├── build_pip_package.sh
└── setup.py
├── tf_dependency
├── BUILD
└── BUILD.tpl
├── riegeli
├── .gitignore
├── tensorflow
│ ├── BUILD
│ ├── ops
│ │ └── riegeli_dataset_ops.cc
│ └── io
│ │ └── BUILD
├── BUILD
├── records
│ ├── README.md
│ ├── skipped_region.cc
│ ├── records_metadata.proto
│ └── tools
│ │ ├── riegeli_summary.proto
│ │ ├── tfrecord_recognizer.h
│ │ └── tfrecord_recognizer.cc
├── chunk_encoding
│ ├── README.md
│ ├── hash.h
│ ├── constants.h
│ ├── decompressor.cc
│ ├── brotli_encoder_selection.h
│ ├── chunk_encoder.cc
│ ├── hash.cc
│ ├── chunk.cc
│ └── brotli_encoder_selection.cc
├── bzip2
│ ├── bzip2_error.h
│ ├── BUILD
│ └── bzip2_error.cc
├── base
│ ├── chain.h
│ ├── external_ref.h
│ ├── string_utils.h
│ ├── external_data.cc
│ ├── port.h
│ ├── constexpr.h
│ ├── buffer.cc
│ ├── errno_mapping.h
│ ├── string_utils.cc
│ ├── shared_buffer.cc
│ ├── status.cc
│ ├── unicode.h
│ ├── stream_utils.cc
│ ├── closing_ptr.h
│ ├── parallelism.h
│ ├── ownership.h
│ ├── assert.cc
│ ├── estimated_allocated_size.h
│ ├── external_data.h
│ ├── types.h
│ ├── memory_estimator.cc
│ ├── parallelism.cc
│ ├── cord_iterator_span.cc
│ ├── type_id.h
│ └── initializer_internal.h
├── xz
│ ├── xz_error.h
│ └── BUILD
├── varint
│ ├── BUILD
│ └── varint_internal.h
├── ordered_varint
│ ├── BUILD
│ └── ordered_varint_internal.h
├── bytes
│ ├── cfile_internal.cc
│ ├── cfile_internal.h
│ ├── fd_internal.h
│ ├── iostream_internal.h
│ ├── fd_internal.cc
│ ├── string_reader.cc
│ ├── fd_internal_for_cc.h
│ ├── array_backward_writer.cc
│ └── std_io.cc
├── zlib
│ ├── zlib_error.h
│ ├── zlib_error.cc
│ └── BUILD
├── brotli
│ ├── brotli_allocator.cc
│ ├── brotli_dictionary.cc
│ └── BUILD
├── digests
│ ├── sha256_digester.h
│ ├── sha512_digester.h
│ ├── sha512_256_digester.h
│ ├── md5_digester.h
│ ├── sha1_digester.h
│ ├── adler32_digester.h
│ ├── adler32_digester.cc
│ ├── crc32_digester.cc
│ ├── highwayhash_digester.cc
│ ├── crc32_digester.h
│ └── openssl_digester.h
├── endian
│ └── BUILD
├── messages
│ ├── map_entry_field.h
│ └── message_wire_format.h
├── lines
│ ├── newline.h
│ └── BUILD
├── lz4
│ ├── lz4_dictionary.cc
│ └── BUILD
├── snappy
│ ├── hadoop
│ │ └── BUILD
│ ├── framed
│ │ └── BUILD
│ ├── snappy_streams.h
│ └── BUILD
├── gcs
│ ├── gcs_internal.h
│ └── BUILD
├── containers
│ └── BUILD
├── text
│ └── BUILD
├── zstd
│ └── BUILD
└── csv
│ └── BUILD
├── MANIFEST.in
├── doc
└── index.md
├── .bazelrc
├── README.md
├── CONTRIBUTING.md
├── configure
└── MODULE.bazel
/python/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/dummy_binary.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tf_dependency/BUILD:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/riegeli/base/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/riegeli/.gitignore:
--------------------------------------------------------------------------------
1 | bazel-*
2 |
--------------------------------------------------------------------------------
/python/riegeli/records/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/riegeli/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/riegeli/records/examples/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/riegeli/records/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/riegeli/tensorflow/ops/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include riegeli *.py
2 |
--------------------------------------------------------------------------------
/python/riegeli/tensorflow/kernel_tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/riegeli/tensorflow/BUILD:
--------------------------------------------------------------------------------
1 | exports_files([
2 | "kernels/riegeli_dataset_ops.cc",
3 | "ops/riegeli_dataset_ops.cc",
4 | ])
5 |
--------------------------------------------------------------------------------
/python/riegeli/records/tests/records_test.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto2";
2 |
3 | package riegeli.tests;
4 |
5 | message SimpleMessage {
6 | optional int32 id = 1;
7 | optional bytes payload = 2;
8 | }
9 |
--------------------------------------------------------------------------------
/riegeli/BUILD:
--------------------------------------------------------------------------------
1 | # Riegeli, file format for storing a sequence of records.
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | exports_files(["LICENSE"])
11 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include .bazelrc
2 | include *.md
3 | include LICENSE
4 | include MANIFEST.in
5 | include WORKSPACE
6 | include configure
7 | recursive-include doc *
8 | recursive-include python *
9 | recursive-include riegeli *
10 | recursive-include third_party *
11 |
--------------------------------------------------------------------------------
/python/riegeli/records/examples/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_python//python:defs.bzl", "py_binary")
2 |
3 | package(features = ["header_modules"])
4 |
5 | licenses(["notice"])
6 |
7 | py_binary(
8 | name = "write_read_records",
9 | srcs = ["write_read_records.py"],
10 | deps = [
11 | "//python/riegeli",
12 | "//python/riegeli/records/tests:records_test_py_pb2",
13 | ],
14 | )
15 |
--------------------------------------------------------------------------------
/riegeli/records/README.md:
--------------------------------------------------------------------------------
1 | # Summary
2 |
3 | Riegeli/records is a file format for storing a sequence of records.
4 |
5 | The format supports sequential writing, appending to a previously created file,
6 | sequential reading, and seeking while reading. Data are optionally compressed,
7 | with special support for the case when records are proto messages. Data
8 | corruption is detected and a reading can be resumed after skipping over a
9 | corrupted region.
10 |
--------------------------------------------------------------------------------
/doc/index.md:
--------------------------------------------------------------------------------
1 | # Riegeli
2 |
3 | *Riegeli/records* is a file format for storing a sequence of string records,
4 | typically serialized protocol buffers. It supports dense compression, fast
5 | decoding, seeking, detection and optional skipping of data corruption, filtering
6 | of proto message fields for even faster decoding, and parallel encoding.
7 |
8 | * [Specification of Riegeli/records file format](riegeli_records_file_format.md).
9 | * [Specifying options for writing Riegeli/records files](record_writer_options.md).
10 |
--------------------------------------------------------------------------------
/tf_dependency/BUILD.tpl:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = ["//visibility:public"],
3 | features = ["header_modules"],
4 | )
5 |
6 | cc_library(
7 | name = "tf_header_lib",
8 | hdrs = [":tf_header_include"],
9 | includes = ["include"],
10 | visibility = ["//visibility:public"],
11 | )
12 |
13 | cc_library(
14 | name = "libtensorflow_framework",
15 | srcs = [":%{TF_SHARED_LIBRARY_NAME}"],
16 | visibility = ["//visibility:public"],
17 | )
18 |
19 | %{TF_HEADER_GENRULE}
20 | %{TF_SHARED_LIBRARY_GENRULE}
21 |
--------------------------------------------------------------------------------
/.bazelrc:
--------------------------------------------------------------------------------
1 | # Enable Bzlmod by default.
2 | common --enable_bzlmod
3 |
4 | # Use C++17.
5 | build --cxxopt=-std=c++17
6 | build --host_cxxopt=-std=c++17
7 |
8 | # Make Python protos faster by backing them with C++ protos.
9 | # TODO: Reenable once protobuf releases
10 | # https://github.com/protocolbuffers/protobuf/pull/22633
11 | # i.e. in version > 32.0. Or possibly switch to upb.
12 | # build --define=use_fast_cpp_protos=true
13 |
14 | # Options from ./configure
15 | # This is currently disabled because TensorFlow does not support bzlmod,
16 | # hence Riegeli/TensorFlow bindings are broken anyway.
17 | # import %workspace%/configure.bazelrc
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Riegeli
2 |
3 | *Riegeli/records* is a file format for storing a sequence of string records,
4 | typically serialized protocol buffers. It supports dense compression, fast
5 | decoding, seeking, detection and optional skipping of data corruption, filtering
6 | of proto message fields for even faster decoding, and parallel encoding.
7 |
8 | See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md).
9 |
10 | # Status
11 |
12 | Riegeli file format will only change in a backward compatible way (i.e. future
13 | readers will understand current files, but current readers might not understand
14 | files using future features).
15 |
16 | Riegeli C++ API might change in incompatible ways.
17 |
--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
1 | # Riegeli
2 |
3 | *Riegeli/records* is a file format for storing a sequence of string records,
4 | typically serialized protocol buffers. It supports dense compression, fast
5 | decoding, seeking, detection and optional skipping of data corruption, filtering
6 | of proto message fields for even faster decoding, and parallel encoding.
7 |
8 | See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md).
9 |
10 | # Status
11 |
12 | Riegeli file format will only change in a backward compatible way (i.e. future
13 | readers will understand current files, but current readers might not understand
14 | files using future features).
15 |
16 | Riegeli C++ API might change in incompatible ways.
17 |
--------------------------------------------------------------------------------
/python/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_python//python:defs.bzl", "py_binary")
2 |
3 | package(default_visibility = ["//visibility:private"])
4 |
5 | licenses(["notice"])
6 |
7 | # These dependencies are gathered in a py_binary, instead of directly in
8 | # sh_binary data, so that bazel links __init__.py files to runfiles.
9 | py_binary(
10 | name = "dummy_binary",
11 | srcs = ["dummy_binary.py"],
12 | srcs_version = "PY3",
13 | deps = [
14 | "//python/riegeli",
15 | "//python/riegeli/tensorflow:riegeli_dataset_ops",
16 | ],
17 | )
18 |
19 | sh_binary(
20 | name = "build_pip_package",
21 | srcs = ["build_pip_package.sh"],
22 | data = [
23 | "MANIFEST.in",
24 | "README.md",
25 | "setup.py",
26 | ":dummy_binary",
27 | ],
28 | )
29 |
--------------------------------------------------------------------------------
/python/riegeli/BUILD:
--------------------------------------------------------------------------------
1 | # Riegeli, file format for storing a sequence of records.
2 |
3 | load("@rules_python//python:defs.bzl", "py_library")
4 |
5 | package(
6 | default_visibility = ["//visibility:public"],
7 | features = ["header_modules"],
8 | )
9 |
10 | licenses(["notice"])
11 |
12 | exports_files(["LICENSE"])
13 |
14 | py_library(
15 | name = "riegeli",
16 | srcs = ["__init__.py"],
17 | imports = [".."],
18 | deps = [
19 | "//python/riegeli/base:riegeli_error",
20 | "//python/riegeli/records:record_position",
21 | "//python/riegeli/records:record_reader",
22 | "//python/riegeli/records:record_writer",
23 | "//python/riegeli/records:records_metadata_py_pb2",
24 | "//python/riegeli/records:skipped_region",
25 | ],
26 | )
27 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/README.md:
--------------------------------------------------------------------------------
1 | # Purpose
2 |
3 | Riegeli/transpose transforms protocol buffer byte streams into a custom data
4 | format that can be compressed around 20% more densely. The additional transform
5 | slows down both compression and decompression by around 50%. Often this is still
6 | a desirable trade-off, but in the end it depends on the compressed data and
7 | other system requirements such as latency vs. resource use.
8 |
9 | # Detailed design
10 |
11 | Transposition of a set of protocol buffers means that we associate a container
12 | with each tag. Then all the values corresponding to a specific tag are stored in
13 | the container associated with it. Invocation of a general purpose compression
14 | algorithm on the concatenation of these containers offers better compression
15 | ratios than it's invocation on the concatenation of the original binary encoding
16 | of the protocol buffers.
17 |
--------------------------------------------------------------------------------
/python/riegeli/records/tests/BUILD:
--------------------------------------------------------------------------------
1 | load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library")
2 | load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library")
3 | load("@rules_python//python:defs.bzl", "py_test")
4 |
5 | package(
6 | default_visibility = [
7 | "//python/riegeli:__subpackages__",
8 | ],
9 | features = ["header_modules"],
10 | )
11 |
12 | licenses(["notice"])
13 |
14 | py_test(
15 | name = "records_test",
16 | srcs = ["records_test.py"],
17 | deps = [
18 | ":records_test_py_pb2",
19 | "//python/riegeli",
20 | "@absl_py//absl/logging",
21 | "@absl_py//absl/testing:absltest",
22 | "@absl_py//absl/testing:parameterized",
23 | "@com_google_protobuf//:protobuf_python",
24 | ],
25 | )
26 |
27 | proto_library(
28 | name = "records_test_proto",
29 | srcs = ["records_test.proto"],
30 | )
31 |
32 | py_proto_library(
33 | name = "records_test_py_pb2",
34 | deps = ["records_test_proto"],
35 | )
36 |
--------------------------------------------------------------------------------
/riegeli/bzip2/bzip2_error.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BZIP2_BZIP2_ERROR_H_
16 | #define RIEGELI_BZIP2_BZIP2_ERROR_H_
17 |
18 | #include "absl/status/status.h"
19 | #include "absl/strings/string_view.h"
20 |
21 | namespace riegeli::bzip2_internal {
22 |
23 | absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code);
24 |
25 | } // namespace riegeli::bzip2_internal
26 |
27 | #endif // RIEGELI_BZIP2_BZIP2_ERROR_H_
28 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
--------------------------------------------------------------------------------
/riegeli/base/chain.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_CHAIN_H_
16 | #define RIEGELI_BASE_CHAIN_H_
17 |
18 | #include "riegeli/base/chain_base.h" // IWYU pragma: export
19 | #include "riegeli/base/chain_details.h" // IWYU pragma: export
20 | #include "riegeli/base/external_ref_base.h" // IWYU pragma: keep
21 | #include "riegeli/base/external_ref_support.h" // IWYU pragma: keep
22 |
23 | #endif // RIEGELI_BASE_CHAIN_H_
24 |
--------------------------------------------------------------------------------
/riegeli/base/external_ref.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_EXTERNAL_REF_H_
16 | #define RIEGELI_BASE_EXTERNAL_REF_H_
17 |
18 | #include "riegeli/base/chain_base.h" // IWYU pragma: keep
19 | #include "riegeli/base/chain_details.h" // IWYU pragma: keep
20 | #include "riegeli/base/external_ref_base.h" // IWYU pragma: export
21 | #include "riegeli/base/external_ref_support.h" // IWYU pragma: export
22 |
23 | #endif // RIEGELI_BASE_EXTERNAL_REF_H_
24 |
--------------------------------------------------------------------------------
/riegeli/records/skipped_region.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/records/skipped_region.h"
16 |
17 | #include
18 | #include
19 |
20 | #include "absl/strings/str_cat.h"
21 |
22 | namespace riegeli {
23 |
24 | std::string SkippedRegion::ToString() const {
25 | return absl::StrCat("[", begin_, "..", end_, "): ", message_);
26 | }
27 |
28 | void SkippedRegion::Output(std::ostream& dest) const { dest << ToString(); }
29 |
30 | } // namespace riegeli
31 |
--------------------------------------------------------------------------------
/riegeli/xz/xz_error.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_XZ_XZ_ERROR_H_
16 | #define RIEGELI_XZ_XZ_ERROR_H_
17 |
18 | #include "absl/status/status.h"
19 | #include "absl/strings/string_view.h"
20 | #include "lzma.h"
21 |
22 | namespace riegeli::xz_internal {
23 |
24 | absl::Status XzErrorToStatus(absl::string_view operation,
25 | lzma_ret liblzma_code);
26 |
27 | } // namespace riegeli::xz_internal
28 |
29 | #endif // RIEGELI_XZ_XZ_ERROR_H_
30 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/hash.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_CHUNK_ENCODING_HASH_H_
16 | #define RIEGELI_CHUNK_ENCODING_HASH_H_
17 |
18 | #include
19 |
20 | #include "absl/strings/string_view.h"
21 | #include "riegeli/base/chain.h"
22 |
23 | namespace riegeli::chunk_encoding_internal {
24 |
25 | uint64_t Hash(absl::string_view data);
26 | uint64_t Hash(const Chain& data);
27 |
28 | } // namespace riegeli::chunk_encoding_internal
29 |
30 | #endif // RIEGELI_CHUNK_ENCODING_HASH_H_
31 |
--------------------------------------------------------------------------------
/riegeli/varint/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "varint_reading",
12 | srcs = [
13 | "varint_internal.h",
14 | "varint_reading.cc",
15 | ],
16 | hdrs = ["varint_reading.h"],
17 | deps = [
18 | "//riegeli/base:arithmetic",
19 | "//riegeli/base:assert",
20 | "//riegeli/bytes:reader",
21 | "@com_google_absl//absl/base:core_headers",
22 | "@com_google_absl//absl/strings:cord",
23 | "@com_google_absl//absl/strings:string_view",
24 | ],
25 | )
26 |
27 | cc_library(
28 | name = "varint_writing",
29 | srcs = ["varint_internal.h"],
30 | hdrs = ["varint_writing.h"],
31 | deps = [
32 | "//riegeli/base:arithmetic",
33 | "//riegeli/base:constexpr",
34 | "//riegeli/bytes:backward_writer",
35 | "//riegeli/bytes:writer",
36 | "@com_google_absl//absl/base:core_headers",
37 | "@com_google_absl//absl/numeric:bits",
38 | ],
39 | )
40 |
--------------------------------------------------------------------------------
/riegeli/varint/varint_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_VARINT_VARINT_INTERNAL_H_
16 | #define RIEGELI_VARINT_VARINT_INTERNAL_H_
17 |
18 | // IWYU pragma: private, include "riegeli/varint/varint_reading.h"
19 | // IWYU pragma: private, include "riegeli/varint/varint_writing.h"
20 |
21 | #include
22 |
23 | namespace riegeli {
24 |
25 | inline constexpr size_t kMaxLengthVarint32 = 5;
26 | inline constexpr size_t kMaxLengthVarint64 = 10;
27 |
28 | } // namespace riegeli
29 |
30 | #endif // RIEGELI_VARINT_VARINT_INTERNAL_H_
31 |
--------------------------------------------------------------------------------
/riegeli/ordered_varint/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "ordered_varint_reading",
12 | srcs = [
13 | "ordered_varint_internal.h",
14 | "ordered_varint_reading.cc",
15 | ],
16 | hdrs = ["ordered_varint_reading.h"],
17 | deps = [
18 | "//riegeli/base:assert",
19 | "//riegeli/bytes:reader",
20 | "//riegeli/endian:endian_reading",
21 | "@com_google_absl//absl/base:core_headers",
22 | ],
23 | )
24 |
25 | cc_library(
26 | name = "ordered_varint_writing",
27 | srcs = [
28 | "ordered_varint_internal.h",
29 | "ordered_varint_writing.cc",
30 | ],
31 | hdrs = ["ordered_varint_writing.h"],
32 | deps = [
33 | "//riegeli/base:arithmetic",
34 | "//riegeli/base:assert",
35 | "//riegeli/bytes:writer",
36 | "//riegeli/endian:endian_writing",
37 | "@com_google_absl//absl/base:core_headers",
38 | "@com_google_absl//absl/numeric:bits",
39 | ],
40 | )
41 |
--------------------------------------------------------------------------------
/riegeli/bytes/cfile_internal.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/bytes/cfile_internal.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "absl/base/optimization.h"
22 | #include "riegeli/bytes/fd_internal.h"
23 |
24 | namespace riegeli::cfile_internal {
25 |
26 | std::string FilenameForCFile(FILE* file) {
27 | const int fd = fileno(file);
28 | if (ABSL_PREDICT_FALSE(fd < 0)) {
29 | return "";
30 | } else {
31 | return fd_internal::FilenameForFd(fd);
32 | }
33 | }
34 |
35 | } // namespace riegeli::cfile_internal
36 |
--------------------------------------------------------------------------------
/riegeli/base/string_utils.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_STRING_UTILS_H_
16 | #define RIEGELI_BASE_STRING_UTILS_H_
17 |
18 | #include
19 |
20 | #include
21 |
22 | #include "absl/base/nullability.h"
23 |
24 | ABSL_POINTERS_DEFAULT_NONNULL
25 |
26 | namespace riegeli {
27 |
28 | // Resizes `dest` to `new_size`, ensuring that repeated growth has the cost
29 | // proportional to the final size. New contents are unspecified.
30 | void ResizeStringAmortized(std::string& dest, size_t new_size);
31 |
32 | } // namespace riegeli
33 |
34 | #endif // RIEGELI_BASE_STRING_UTILS_H_
35 |
--------------------------------------------------------------------------------
/riegeli/zlib/zlib_error.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_ZLIB_ZLIB_ERROR_H_
16 | #define RIEGELI_ZLIB_ZLIB_ERROR_H_
17 |
18 | #include "absl/base/attributes.h"
19 | #include "absl/status/status.h"
20 | #include "absl/strings/string_view.h"
21 |
22 | namespace riegeli::zlib_internal {
23 |
24 | ABSL_ATTRIBUTE_COLD absl::Status ZlibErrorToStatus(absl::string_view operation,
25 | int zlib_code,
26 | const char* details);
27 |
28 | } // namespace riegeli::zlib_internal
29 |
30 | #endif // RIEGELI_ZLIB_ZLIB_ERROR_H_
31 |
--------------------------------------------------------------------------------
/python/riegeli/base/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 | load("@rules_python//python:defs.bzl", "py_library")
3 |
4 | package(
5 | default_visibility = ["//python/riegeli:__subpackages__"],
6 | features = ["header_modules"],
7 | )
8 |
9 | licenses(["notice"])
10 |
11 | cc_library(
12 | name = "utils",
13 | srcs = ["utils.cc"],
14 | hdrs = ["utils.h"],
15 | data = [":riegeli_error"], # Python module imported from C++.
16 | # utils.cc has #define before #include to influence what the included
17 | # files provide.
18 | features = ["-use_header_modules"],
19 | deps = [
20 | "//riegeli/base:arithmetic",
21 | "//riegeli/base:assert",
22 | "//riegeli/base:chain",
23 | "//riegeli/base:compare",
24 | "//riegeli/base:types",
25 | "@com_google_absl//absl/base:core_headers",
26 | "@com_google_absl//absl/status",
27 | "@com_google_absl//absl/strings",
28 | "@com_google_absl//absl/strings:string_view",
29 | "@com_google_absl//absl/types:span",
30 | "@rules_python//python/cc:current_py_cc_headers",
31 | ],
32 | )
33 |
34 | py_library(
35 | name = "riegeli_error",
36 | srcs = ["riegeli_error.py"],
37 | )
38 |
--------------------------------------------------------------------------------
/riegeli/brotli/brotli_allocator.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/brotli/brotli_allocator.h"
16 |
17 | #include
18 |
19 | namespace riegeli {
20 |
21 | namespace brotli_internal {
22 |
23 | void* RiegeliBrotliAllocFunc(void* opaque, size_t size) {
24 | return static_cast(opaque)->Alloc(size);
25 | }
26 |
27 | void RiegeliBrotliFreeFunc(void* opaque, void* ptr) {
28 | static_cast(opaque)->Free(ptr);
29 | }
30 |
31 | } // namespace brotli_internal
32 |
33 | BrotliAllocator::Interface::~Interface() {}
34 |
35 | } // namespace riegeli
36 |
--------------------------------------------------------------------------------
/riegeli/base/external_data.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/external_data.h"
16 |
17 | #include
18 |
19 | #include "absl/strings/string_view.h"
20 |
21 | namespace riegeli {
22 |
23 | ExternalData ExternalDataCopy(absl::string_view data) {
24 | char* storage = nullptr;
25 | if (!data.empty()) {
26 | storage = static_cast(operator new(data.size()));
27 | std::memcpy(storage, data.data(), data.size());
28 | }
29 | return ExternalData{ExternalStorage(storage, operator delete),
30 | absl::string_view(storage, data.size())};
31 | }
32 |
33 | } // namespace riegeli
34 |
--------------------------------------------------------------------------------
/riegeli/digests/sha256_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_SHA256_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_SHA256_DIGESTER_H_
17 |
18 | #include "openssl/base.h"
19 | #include "openssl/sha.h"
20 | #include "riegeli/digests/openssl_digester.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing SHA-256 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | using Sha256Digester = OpenSslDigester;
28 |
29 | } // namespace riegeli
30 |
31 | #endif // RIEGELI_DIGESTS_SHA256_DIGESTER_H_
32 |
--------------------------------------------------------------------------------
/riegeli/digests/sha512_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_SHA512_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_SHA512_DIGESTER_H_
17 |
18 | #include "openssl/base.h"
19 | #include "openssl/sha.h"
20 | #include "riegeli/digests/openssl_digester.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing SHA-512 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | using Sha512Digester = OpenSslDigester;
28 |
29 | } // namespace riegeli
30 |
31 | #endif // RIEGELI_DIGESTS_SHA512_DIGESTER_H_
32 |
--------------------------------------------------------------------------------
/riegeli/base/port.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_PORT_H_
16 | #define RIEGELI_BASE_PORT_H_
17 |
18 | #include "absl/base/nullability.h"
19 |
20 | ABSL_POINTERS_DEFAULT_NONNULL
21 |
22 | // Clang has `__has_builtin()`. Other compilers need other means to detect
23 | // availability of builtins.
24 | #ifdef __has_builtin
25 | #define RIEGELI_INTERNAL_HAS_BUILTIN(x) __has_builtin(x)
26 | #else
27 | #define RIEGELI_INTERNAL_HAS_BUILTIN(x) 0
28 | #endif
29 |
30 | #define RIEGELI_INTERNAL_IS_GCC_VERSION(major, minor) \
31 | (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
32 |
33 | #endif // RIEGELI_BASE_PORT_H_
34 |
--------------------------------------------------------------------------------
/riegeli/ordered_varint/ordered_varint_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_
16 | #define RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_
17 |
18 | // IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_reading.h"
19 | // IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_writing.h"
20 |
21 | #include
22 |
23 | namespace riegeli {
24 |
25 | inline constexpr size_t kMaxLengthOrderedVarint32 = 5;
26 | inline constexpr size_t kMaxLengthOrderedVarint64 = 9;
27 |
28 | } // namespace riegeli
29 |
30 | #endif // RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_
31 |
--------------------------------------------------------------------------------
/riegeli/base/constexpr.h:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_CONSTEXPR_H_
16 | #define RIEGELI_BASE_CONSTEXPR_H_
17 |
18 | #include "absl/base/nullability.h"
19 | #include "riegeli/base/port.h"
20 |
21 | ABSL_POINTERS_DEFAULT_NONNULL
22 |
23 | namespace riegeli {
24 |
25 | // Returns `true` if the value of the expression is known at compile time.
26 | #if RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_constant_p) || \
27 | RIEGELI_INTERNAL_IS_GCC_VERSION(3, 1)
28 | #define RIEGELI_IS_CONSTANT(expr) __builtin_constant_p(expr)
29 | #else
30 | #define RIEGELI_IS_CONSTANT(expr) false
31 | #endif
32 |
33 | } // namespace riegeli
34 |
35 | #endif // RIEGELI_BASE_CONSTEXPR_H_
36 |
--------------------------------------------------------------------------------
/riegeli/digests/sha512_256_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_
17 |
18 | #include "openssl/base.h"
19 | #include "openssl/sha.h"
20 | #include "riegeli/digests/openssl_digester.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing SHA-512/256 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | using Sha512_256Digester =
27 | OpenSslDigester;
29 |
30 | } // namespace riegeli
31 |
32 | #endif // RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_
33 |
--------------------------------------------------------------------------------
/riegeli/base/buffer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/buffer.h"
16 |
17 | #include
18 |
19 | #include "absl/strings/string_view.h"
20 | #include "riegeli/base/arithmetic.h"
21 |
22 | namespace riegeli {
23 |
24 | void Buffer::DumpStructure(absl::string_view substr, std::ostream& dest) const {
25 | dest << "[buffer] {";
26 | if (!substr.empty()) {
27 | if (substr.data() != data()) {
28 | dest << " space_before: " << PtrDistance(data(), substr.data());
29 | }
30 | dest << " space_after: "
31 | << PtrDistance(substr.data() + substr.size(), data() + capacity());
32 | }
33 | dest << " }";
34 | }
35 |
36 | } // namespace riegeli
37 |
--------------------------------------------------------------------------------
/riegeli/bytes/cfile_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BYTES_CFILE_INTERNAL_H_
16 | #define RIEGELI_BYTES_CFILE_INTERNAL_H_
17 |
18 | #include
19 |
20 | #include
21 |
22 | namespace riegeli::cfile_internal {
23 |
24 | // Infers a filename from the fd corresponding to the `FILE` by reading the
25 | // symlink target for `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns
26 | // a `absl::StrCat("")` placeholder instead), or returning
27 | // "" if there is no corresponding fd.
28 | std::string FilenameForCFile(FILE* file);
29 |
30 | } // namespace riegeli::cfile_internal
31 |
32 | #endif // RIEGELI_BYTES_CFILE_INTERNAL_H_
33 |
--------------------------------------------------------------------------------
/riegeli/base/errno_mapping.h:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_ERRNO_MAPPING_H_
16 | #define RIEGELI_BASE_ERRNO_MAPPING_H_
17 |
18 | #ifdef _WIN32
19 | #include
20 | #endif
21 |
22 | #include "absl/status/status.h"
23 | #ifdef _WIN32
24 | #include "absl/strings/string_view.h"
25 | #endif
26 |
27 | namespace riegeli {
28 |
29 | // Converts `absl::StatusCode` to `errno` value.
30 | int StatusCodeToErrno(absl::StatusCode status_code);
31 |
32 | #ifdef _WIN32
33 | absl::Status WindowsErrorToStatus(uint32_t error_number,
34 | absl::string_view message);
35 | #endif // _WIN32
36 |
37 | } // namespace riegeli
38 |
39 | #endif // RIEGELI_BASE_ERRNO_MAPPING_H_
40 |
--------------------------------------------------------------------------------
/riegeli/endian/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "endian_reading",
12 | hdrs = ["endian_reading.h"],
13 | deps = [
14 | "//riegeli/base:null_safe_memcpy",
15 | "//riegeli/base:type_traits",
16 | "//riegeli/bytes:reader",
17 | "@com_google_absl//absl/base",
18 | "@com_google_absl//absl/base:config",
19 | "@com_google_absl//absl/base:core_headers",
20 | "@com_google_absl//absl/numeric:int128",
21 | "@com_google_absl//absl/types:span",
22 | ],
23 | )
24 |
25 | cc_library(
26 | name = "endian_writing",
27 | hdrs = ["endian_writing.h"],
28 | deps = [
29 | "//riegeli/base:null_safe_memcpy",
30 | "//riegeli/base:type_traits",
31 | "//riegeli/bytes:backward_writer",
32 | "//riegeli/bytes:writer",
33 | "@com_google_absl//absl/base",
34 | "@com_google_absl//absl/base:config",
35 | "@com_google_absl//absl/base:core_headers",
36 | "@com_google_absl//absl/numeric:int128",
37 | "@com_google_absl//absl/strings:string_view",
38 | "@com_google_absl//absl/types:span",
39 | ],
40 | )
41 |
--------------------------------------------------------------------------------
/riegeli/messages/map_entry_field.h:
--------------------------------------------------------------------------------
1 | // Copyright 2025 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_
16 | #define RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_
17 |
18 | // IWYU pragma: private, include "riegeli/messages/field_handlers.h"
19 | // IWYU pragma: private, include "riegeli/messages/serialized_message_backward_writer.h"
20 | // IWYU pragma: private, include "riegeli/messages/serialized_message_writer.h"
21 |
22 | #include "absl/base/nullability.h"
23 |
24 | ABSL_POINTERS_DEFAULT_NONNULL
25 |
26 | namespace riegeli {
27 |
28 | // Specifies a field of synthetic map entry message.
29 | enum MapEntryField { kMapEntryKey = 1, kMapEntryValue = 2 };
30 |
31 | } // namespace riegeli
32 |
33 | #endif // RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_
34 |
--------------------------------------------------------------------------------
/riegeli/base/string_utils.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/string_utils.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "absl/base/nullability.h"
22 | #include "riegeli/base/arithmetic.h"
23 |
24 | ABSL_POINTERS_DEFAULT_NONNULL
25 |
26 | namespace riegeli {
27 |
28 | void ResizeStringAmortized(std::string& dest, size_t new_size) {
29 | if (new_size > dest.capacity()) {
30 | dest.reserve(
31 | dest.capacity() == std::string().capacity()
32 | ? new_size
33 | : UnsignedMax(new_size,
34 | UnsignedMin(dest.capacity() + dest.capacity() / 2,
35 | dest.max_size())));
36 | }
37 | dest.resize(new_size);
38 | }
39 |
40 | } // namespace riegeli
41 |
--------------------------------------------------------------------------------
/riegeli/digests/md5_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_MD5_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_MD5_DIGESTER_H_
17 |
18 | #include "openssl/base.h"
19 | #include "openssl/md5.h"
20 | #include "riegeli/digests/openssl_digester.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing MD5 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | //
27 | // Warning: MD5 as a cryptographic hash function is broken.
28 | // Use this only if a preexisting format has already decided to use MD5.
29 | // Please contact ise-team@ in case of doubt.
30 | using Md5Digester = OpenSslDigester;
32 |
33 | } // namespace riegeli
34 |
35 | #endif // RIEGELI_DIGESTS_MD5_DIGESTER_H_
36 |
--------------------------------------------------------------------------------
/riegeli/digests/sha1_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_SHA1_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_SHA1_DIGESTER_H_
17 |
18 | #include "openssl/base.h"
19 | #include "openssl/sha.h"
20 | #include "riegeli/digests/openssl_digester.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing SHA-1 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | //
27 | // Warning: SHA-1 as a cryptographic hash function is broken.
28 | // Use this only if a preexisting format has already decided to use SHA-1.
29 | // Please contact ise-team@ in case of doubt.
30 | using Sha1Digester = OpenSslDigester;
32 |
33 | } // namespace riegeli
34 |
35 | #endif // RIEGELI_DIGESTS_SHA1_DIGESTER_H_
36 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/constants.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_CHUNK_ENCODING_CONSTANTS_H_
16 | #define RIEGELI_CHUNK_ENCODING_CONSTANTS_H_
17 |
18 | #include
19 |
20 | #include
21 |
22 | namespace riegeli {
23 |
24 | // These values are frozen in the file format.
25 | enum class ChunkType : uint8_t {
26 | kFileSignature = 's',
27 | kFileMetadata = 'm',
28 | kPadding = 'p',
29 | kSimple = 'r',
30 | kTransposed = 't',
31 | };
32 |
33 | // These values are frozen in the file format.
34 | enum class CompressionType : uint8_t {
35 | kNone = 0,
36 | kBrotli = 'b',
37 | kZstd = 'z',
38 | kSnappy = 's',
39 | };
40 |
41 | inline constexpr uint64_t kMaxNumRecords =
42 | std::numeric_limits::max() >> 8;
43 |
44 | } // namespace riegeli
45 |
46 | #endif // RIEGELI_CHUNK_ENCODING_CONSTANTS_H_
47 |
--------------------------------------------------------------------------------
/riegeli/digests/adler32_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_ADLER32_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_ADLER32_DIGESTER_H_
17 |
18 | #include
19 |
20 | #include "absl/strings/string_view.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing Adler32 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | class Adler32Digester {
27 | public:
28 | Adler32Digester() : Adler32Digester(1) {}
29 |
30 | explicit Adler32Digester(uint32_t seed);
31 |
32 | Adler32Digester(const Adler32Digester& that) = default;
33 | Adler32Digester& operator=(const Adler32Digester& that) = default;
34 |
35 | void Write(absl::string_view src);
36 | uint32_t Digest() { return adler_; }
37 |
38 | private:
39 | uint32_t adler_;
40 | };
41 |
42 | } // namespace riegeli
43 |
44 | #endif // RIEGELI_DIGESTS_ADLER32_DIGESTER_H_
45 |
--------------------------------------------------------------------------------
/riegeli/base/shared_buffer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/shared_buffer.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "absl/strings/string_view.h"
22 | #include "riegeli/base/arithmetic.h"
23 |
24 | namespace riegeli {
25 |
26 | void SharedBuffer::DumpStructure(absl::string_view substr,
27 | std::ostream& dest) const {
28 | dest << "[shared_buffer] {";
29 | const size_t ref_count = GetRefCount();
30 | if (ref_count != 1) dest << " ref_count: " << ref_count;
31 | if (!substr.empty()) {
32 | if (substr.data() != data()) {
33 | dest << " space_before: " << PtrDistance(data(), substr.data());
34 | }
35 | dest << " space_after: "
36 | << PtrDistance(substr.data() + substr.size(), data() + capacity());
37 | }
38 | dest << " }";
39 | }
40 |
41 | } // namespace riegeli
42 |
--------------------------------------------------------------------------------
/riegeli/digests/adler32_digester.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/digests/adler32_digester.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/strings/string_view.h"
21 | #include "riegeli/base/arithmetic.h"
22 | #include "zconf.h"
23 | #include "zlib.h"
24 |
25 | namespace riegeli {
26 |
27 | Adler32Digester::Adler32Digester(uint32_t seed) : adler_(seed) {
28 | // This checks CPU features.
29 | adler32_z(0, nullptr, 0);
30 | }
31 |
32 | void Adler32Digester::Write(absl::string_view src) {
33 | if (ABSL_PREDICT_FALSE(src.empty())) {
34 | // `adler32_z(state, nullptr, 0)` exceptionally returns 1, not `state`.
35 | return;
36 | }
37 | adler_ = IntCast(adler32_z(
38 | IntCast(adler_), reinterpret_cast(src.data()),
39 | IntCast(src.size())));
40 | }
41 |
42 | } // namespace riegeli
43 |
--------------------------------------------------------------------------------
/python/riegeli/BUILD.tpl:
--------------------------------------------------------------------------------
1 | load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair")
2 |
3 | licenses(["restricted"])
4 |
5 | package(
6 | default_visibility = ["//visibility:public"],
7 | features = ["header_modules"],
8 | )
9 |
10 | toolchain(
11 | name = "toolchain",
12 | toolchain = ":py_runtime_pair",
13 | toolchain_type = "@bazel_tools//tools/python:toolchain_type",
14 | )
15 |
16 | # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib
17 | # See https://docs.python.org/3/extending/windows.html
18 | cc_import(
19 | name = "python_lib",
20 | interface_library = select({
21 | ":windows": ":python_import_lib",
22 | # A placeholder for Unix platforms which makes --no_build happy.
23 | "//conditions:default": "not-existing.lib",
24 | }),
25 | system_provided = 1,
26 | )
27 |
28 | cc_library(
29 | name = "python_headers",
30 | hdrs = [":python_include"],
31 | deps = select({
32 | ":windows": [":python_lib"],
33 | "//conditions:default": [],
34 | }),
35 | includes = ["python_include"],
36 | )
37 |
38 | cc_library(
39 | name = "numpy_headers",
40 | hdrs = [":numpy_include"],
41 | includes = ["numpy_include"],
42 | )
43 |
44 | config_setting(
45 | name = "windows",
46 | values = {"cpu": "x64_windows"},
47 | visibility = ["//visibility:public"],
48 | )
49 |
50 | %{PYTHON_RUNTIME_PAIR}
51 | %{PYTHON_INCLUDE_GENRULE}
52 | %{NUMPY_INCLUDE_GENRULE}
53 | %{PYTHON_IMPORT_LIB_GENRULE}
54 |
--------------------------------------------------------------------------------
/riegeli/digests/crc32_digester.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/digests/crc32_digester.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/strings/string_view.h"
21 | #include "riegeli/base/arithmetic.h"
22 | #include "zconf.h"
23 | #include "zlib.h"
24 |
25 | namespace riegeli {
26 |
27 | Crc32Digester::Crc32Digester(uint32_t seed) : crc_(seed) {
28 | // This checks CPU features.
29 | crc32_z(0, nullptr, 0);
30 | }
31 |
32 | void Crc32Digester::Write(absl::string_view src) {
33 | if (ABSL_PREDICT_FALSE(src.empty())) {
34 | // `crc32_z(state, nullptr, 0)` exceptionally returns 0, not `state`.
35 | return;
36 | }
37 | crc_ = IntCast(crc32_z(IntCast(crc_),
38 | reinterpret_cast(src.data()),
39 | IntCast(src.size())));
40 | }
41 |
42 | } // namespace riegeli
43 |
--------------------------------------------------------------------------------
/riegeli/base/status.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/status.h"
16 |
17 | #include "absl/status/status.h"
18 | #include "absl/strings/cord.h"
19 | #include "absl/strings/str_cat.h"
20 | #include "absl/strings/string_view.h"
21 |
22 | namespace riegeli {
23 |
24 | absl::Status SetMessage(const absl::Status& status, absl::string_view message) {
25 | absl::Status result(status.code(), message);
26 | status.ForEachPayload(
27 | [&](absl::string_view type_url, const absl::Cord& payload) {
28 | result.SetPayload(type_url, payload);
29 | });
30 | return result;
31 | }
32 |
33 | absl::Status Annotate(const absl::Status& status, absl::string_view detail) {
34 | if (status.ok() || detail.empty()) return status;
35 | return SetMessage(status, status.message().empty()
36 | ? detail
37 | : absl::StrCat(status.message(), "; ", detail));
38 | }
39 |
40 | } // namespace riegeli
41 |
--------------------------------------------------------------------------------
/riegeli/base/unicode.h:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_UNICODE_H_
16 | #define RIEGELI_BASE_UNICODE_H_
17 |
18 | #ifdef _WIN32
19 |
20 | #include
21 |
22 | #include "absl/base/nullability.h"
23 | #include "absl/strings/string_view.h"
24 | #include "absl/types/span.h"
25 |
26 | ABSL_POINTERS_DEFAULT_NONNULL
27 |
28 | namespace riegeli {
29 |
30 | // Converts from UTF-8 string to `wchar_t[]`.
31 | //
32 | // Returns `false` on failure.
33 | bool Utf8ToWide(absl::string_view src, std::wstring& dest);
34 |
35 | // Converts from `wchar_t[]` to UTF-8 string.
36 | //
37 | // Returns `false` on failure.
38 | bool WideToUtf8(absl::Span src, std::string& dest);
39 |
40 | // Converts from `wchar_t[]` to UTF-8 string.
41 | //
42 | // Emits replacement characters on failure.
43 | std::string WideToUtf8Lossy(absl::Span src);
44 |
45 | } // namespace riegeli
46 |
47 | #endif // _WIN32
48 |
49 | #endif // RIEGELI_BASE_UNICODE_H_
50 |
--------------------------------------------------------------------------------
/riegeli/records/records_metadata.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto2";
2 |
3 | package riegeli;
4 |
5 | import "google/protobuf/descriptor.proto";
6 |
7 | // Information about a Riegeli/records file, which may be helpful to interpret
8 | // file contents.
9 | message RecordsMetadata {
10 | // Human-readable explanation of what the file contains.
11 | optional string file_comment = 1;
12 |
13 | // If records are proto messages of a fixed type, the full name of their type.
14 | optional string record_type_name = 2;
15 |
16 | // If `record_type_name` is set, proto file descriptors which should contain
17 | // the definition of that type and their dependencies (each file comes after
18 | // all its dependencies).
19 | //
20 | // If `file_descriptor` is empty but `record_type_name` is set (not
21 | // recommended), `record_type_name` can be interpreted in the context of an
22 | // unspecified proto descriptor database.
23 | repeated google.protobuf.FileDescriptorProto file_descriptor = 3;
24 |
25 | // Options originally used to encode the file:
26 | // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
27 | //
28 | // They are informative here, they are never necessary to decode the file.
29 | optional string record_writer_options = 4;
30 |
31 | // Number of records in the file, so that the reader can tune for it.
32 | //
33 | // This is informative, the actual number of records may differ.
34 | optional int64 num_records = 5;
35 |
36 | // Clients can define custom metadata in extensions of this message.
37 | extensions 1000 to max;
38 | }
39 |
--------------------------------------------------------------------------------
/python/riegeli/records/records_metadata.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto2";
2 |
3 | package riegeli;
4 |
5 | import "google/protobuf/descriptor.proto";
6 |
7 | // Information about a Riegeli/records file, which may be helpful to interpret
8 | // file contents.
9 | message RecordsMetadata {
10 | // Human-readable explanation of what the file contains.
11 | optional string file_comment = 1;
12 |
13 | // If records are proto messages of a fixed type, the full name of their type.
14 | optional string record_type_name = 2;
15 |
16 | // If `record_type_name` is set, proto file descriptors which should contain
17 | // the definition of that type and their dependencies (each file comes after
18 | // all its dependencies).
19 | //
20 | // If `file_descriptor` is empty but `record_type_name` is set (not
21 | // recommended), `record_type_name` can be interpreted in the context of an
22 | // unspecified proto descriptor database.
23 | repeated google.protobuf.FileDescriptorProto file_descriptor = 3;
24 |
25 | // Options originally used to encode the file:
26 | // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
27 | //
28 | // They are informative here, they are never necessary to decode the file.
29 | optional string record_writer_options = 4;
30 |
31 | // Number of records in the file, so that the reader can tune for it.
32 | //
33 | // This is informative, the actual number of records may differ.
34 | optional int64 num_records = 5;
35 |
36 | // Clients can define custom metadata in extensions of this message.
37 | extensions 1000 to max;
38 | }
39 |
--------------------------------------------------------------------------------
/riegeli/bytes/fd_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BYTES_FD_INTERNAL_H_
16 | #define RIEGELI_BYTES_FD_INTERNAL_H_
17 |
18 | #ifndef __APPLE__
19 | #include
20 | #endif
21 |
22 | #include
23 |
24 | namespace riegeli::fd_internal {
25 |
26 | // Infers a filename from fd by reading the symlink target for
27 | // `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns a
28 | // `absl::StrCat("")` placeholder instead).
29 | std::string FilenameForFd(int fd);
30 |
31 | #ifndef _WIN32
32 | #ifndef __APPLE__
33 | inline constexpr int kCloseOnExec = O_CLOEXEC;
34 | #else // __APPLE__
35 | // On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is
36 | // defined out of line.
37 | extern const int kCloseOnExec;
38 | #endif // __APPLE__
39 | #else // _WIN32
40 | inline constexpr int kCloseOnExec = _O_NOINHERIT;
41 | #endif // _WIN32
42 |
43 | } // namespace riegeli::fd_internal
44 |
45 | #endif // RIEGELI_BYTES_FD_INTERNAL_H_
46 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/decompressor.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/chunk_encoding/decompressor.h"
16 |
17 | #include
18 | #include
19 |
20 | #include
21 |
22 | #include "absl/base/optimization.h"
23 | #include "riegeli/base/chain.h"
24 | #include "riegeli/bytes/chain_reader.h"
25 | #include "riegeli/chunk_encoding/constants.h"
26 | #include "riegeli/varint/varint_reading.h"
27 |
28 | namespace riegeli::chunk_encoding_internal {
29 |
30 | std::optional UncompressedSize(const Chain& compressed_data,
31 | CompressionType compression_type) {
32 | if (compression_type == CompressionType::kNone) return compressed_data.size();
33 | ChainReader<> compressed_data_reader(&compressed_data);
34 | uint64_t size;
35 | if (ABSL_PREDICT_FALSE(!ReadVarint64(compressed_data_reader, size))) {
36 | return std::nullopt;
37 | }
38 | return size;
39 | }
40 |
41 | } // namespace riegeli::chunk_encoding_internal
42 |
--------------------------------------------------------------------------------
/riegeli/digests/highwayhash_digester.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/digests/highwayhash_digester.h"
16 |
17 | #include "highwayhash/hh_types.h"
18 |
19 | namespace riegeli {
20 |
21 | template <>
22 | alignas(32) const HighwayHashKey
23 | HighwayHashDigester::kDefaultKey = {
24 | 0x4ea9929a25d561c6,
25 | 0x98470d187b523e8f,
26 | 0x592040a2da3c4b53,
27 | 0xbff8b246e3c587a2,
28 | };
29 |
30 | template <>
31 | alignas(32) const HighwayHashKey
32 | HighwayHashDigester::kDefaultKey = {
33 | 0x025ed8a16fb5f783,
34 | 0xb44bc74d89d26c86,
35 | 0x111ea964039fa769,
36 | 0x6f7d7159e15612b6,
37 | };
38 |
39 | template <>
40 | alignas(32) const HighwayHashKey
41 | HighwayHashDigester::kDefaultKey = {
42 | 0x93fee04321119357,
43 | 0x21e397ea62c264b6,
44 | 0x9d856914f2ad0e15,
45 | 0x64dca6f86247f384,
46 | };
47 |
48 | } // namespace riegeli
49 |
--------------------------------------------------------------------------------
/python/riegeli/tensorflow/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_binary")
2 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
3 |
4 | package(
5 | default_visibility = ["//visibility:public"],
6 | features = ["header_modules"],
7 | )
8 |
9 | licenses(["notice"])
10 |
11 | py_library(
12 | name = "riegeli_dataset_ops",
13 | srcs = ["ops/riegeli_dataset_ops.py"],
14 | data = [":ops/_riegeli_dataset_ops.so"],
15 | )
16 |
17 | cc_binary(
18 | name = "ops/_riegeli_dataset_ops.so",
19 | srcs = [
20 | "//riegeli/tensorflow:kernels/riegeli_dataset_ops.cc",
21 | "//riegeli/tensorflow:ops/riegeli_dataset_ops.cc",
22 | ],
23 | # tensorflow/core/lib/core/refcount.h needs NDEBUG consistency between
24 | # translation units.
25 | copts = ["-DNDEBUG"],
26 | linkshared = True,
27 | deps = [
28 | "//riegeli/base:arithmetic",
29 | "//riegeli/records:record_position",
30 | "//riegeli/records:record_reader",
31 | "//riegeli/records:skipped_region",
32 | "//riegeli/tensorflow/io:file_reader",
33 | "@com_google_absl//absl/base:core_headers",
34 | "@com_google_absl//absl/status",
35 | "@com_google_absl//absl/strings:string_view",
36 | "@com_google_absl//absl/synchronization",
37 | "@local_config_tf//:libtensorflow_framework",
38 | "@local_config_tf//:tf_header_lib",
39 | ],
40 | )
41 |
42 | py_test(
43 | name = "riegeli_dataset_test",
44 | srcs = ["kernel_tests/riegeli_dataset_test.py"],
45 | deps = [
46 | ":riegeli_dataset_ops",
47 | "//python/riegeli",
48 | ],
49 | )
50 |
--------------------------------------------------------------------------------
/riegeli/lines/newline.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_LINES_NEWLINE_H_
16 | #define RIEGELI_LINES_NEWLINE_H_
17 |
18 | #include "absl/strings/string_view.h"
19 |
20 | namespace riegeli {
21 |
22 | // Line terminator representations to recognize.
23 | enum class ReadNewline {
24 | kLf, // LF ("\n")
25 | kCrLfOrLf, // CR-LF | LF ("\r\n" | "\n")
26 | };
27 |
28 | // Line terminator representation to write.
29 | enum class WriteNewline {
30 | kLf, // LF ("\n")
31 | kCrLf, // CR-LF ("\r\n")
32 |
33 | #ifndef _WIN32
34 | kNative = kLf,
35 | #else
36 | kNative = kCrLf,
37 | #endif
38 | };
39 |
40 | // Native line representation as a string.
41 | #ifndef _WIN32
42 | inline constexpr absl::string_view kNewline = "\n";
43 | #else
44 | inline constexpr absl::string_view kNewline = "\r\n";
45 | #endif
46 |
47 | // UTF-8 BOM representation as a string.
48 | inline constexpr absl::string_view kUtf8Bom = "\xef\xbb\xbf";
49 |
50 | } // namespace riegeli
51 |
52 | #endif // RIEGELI_LINES_NEWLINE_H_
53 |
--------------------------------------------------------------------------------
/riegeli/records/tools/riegeli_summary.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto2";
2 |
3 | package riegeli.summary;
4 |
5 | import "riegeli/records/records_metadata.proto";
6 |
7 | // Summary of a Riegeli/records file contents.
8 | //
9 | // This is currently used merely to format the output of describe_riegeli_file
10 | // as structured data.
11 |
12 | enum ChunkType {
13 | FILE_SIGNATURE = 0x73;
14 | FILE_METADATA = 0x6d;
15 | PADDING = 0x70;
16 | SIMPLE = 0x72;
17 | TRANSPOSED = 0x74;
18 | }
19 |
20 | enum CompressionType {
21 | NONE = 0;
22 | BROTLI = 0x62;
23 | ZSTD = 0x7a;
24 | SNAPPY = 0x73;
25 | }
26 |
27 | message SimpleChunk {
28 | optional CompressionType compression_type = 1;
29 | repeated uint64 record_sizes = 2 [packed = true];
30 | repeated bytes records = 3;
31 | }
32 |
33 | message TransposedChunk {
34 | optional CompressionType compression_type = 1;
35 | repeated uint64 record_sizes = 2 [packed = true];
36 | repeated bytes records = 3;
37 | }
38 |
39 | message Chunk {
40 | optional uint64 chunk_begin = 1;
41 | optional ChunkType chunk_type = 2;
42 | optional uint64 data_size = 3;
43 | optional uint64 num_records = 4;
44 | optional uint64 decoded_data_size = 5;
45 | oneof Data {
46 | riegeli.RecordsMetadata file_metadata_chunk = 6;
47 | SimpleChunk simple_chunk = 7;
48 | TransposedChunk transposed_chunk = 8;
49 | }
50 | }
51 |
52 | // This is not used because each chunk is printed on the fly, so that the output
53 | // appears incrementally.
54 | //
55 | // message File {
56 | // optional string filename = 1;
57 | // optional uint64 file_size = 2;
58 | // repeated Chunk chunk = 3;
59 | // }
60 |
--------------------------------------------------------------------------------
/python/riegeli/records/skipped_region.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Details about a skipped region of invalid file contents."""
15 |
16 | __all__ = ('SkippedRegion',)
17 |
18 |
19 | class SkippedRegion:
20 | """Details about a skipped region of invalid file contents.
21 |
22 | Attributes:
23 | begin: File position of the beginning of the skipped region, inclusive.
24 | end: File position of the end of the skipped region, exclusive.
25 | length: Length of the skipped region, in bytes.
26 | message: Message explaining why the region is invalid.
27 | """
28 |
29 | __slots__ = ('begin', 'end', 'message')
30 |
31 | def __init__(self, begin, end, message):
32 | if begin > end:
33 | raise ValueError(f'Positions in the wrong order: {begin} > {end}')
34 | self.begin = begin
35 | self.end = end
36 | self.message = message
37 |
38 | @property
39 | def length(self):
40 | return self.end - self.begin
41 |
42 | def __str__(self):
43 | return f'[{self.begin}..{self.end}): {self.message}'
44 |
45 | def __repr__(self):
46 | return f'SkippedRegion({self.begin}, {self.end}, {self.message!r})'
47 |
--------------------------------------------------------------------------------
/python/riegeli/py_extension.bzl:
--------------------------------------------------------------------------------
1 | """Supports writing Python modules in C++."""
2 |
3 | load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
4 | load("@rules_python//python:defs.bzl", "py_library")
5 |
6 | def py_extension(
7 | name = None,
8 | srcs = None,
9 | hdrs = None,
10 | data = None,
11 | features = None,
12 | visibility = None,
13 | deps = None):
14 | """Creates a Python module implemented in C++.
15 |
16 | Python modules can depend on a py_extension. Other py_extensions can depend
17 | on a generated C++ library named with "_cc" suffix.
18 |
19 | Args:
20 | name: Name for this target.
21 | srcs: C++ source files.
22 | hdrs: C++ header files, for other py_extensions which depend on this.
23 | data: Files needed at runtime. This may include Python libraries.
24 | features: Passed to cc_library.
25 | visibility: Controls which rules can depend on this.
26 | deps: Other C++ libraries that this library depends upon.
27 | """
28 |
29 | cc_library_name = name + "_cc"
30 | cc_binary_name = name + ".so"
31 | cc_library(
32 | name = cc_library_name,
33 | srcs = srcs,
34 | hdrs = hdrs,
35 | data = data,
36 | features = features,
37 | visibility = visibility,
38 | deps = deps,
39 | alwayslink = True,
40 | )
41 | cc_binary(
42 | name = cc_binary_name,
43 | linkshared = True,
44 | linkstatic = True,
45 | visibility = ["//visibility:private"],
46 | deps = [cc_library_name],
47 | )
48 |
49 | py_library(
50 | name = name,
51 | data = [cc_binary_name],
52 | visibility = visibility,
53 | )
54 |
--------------------------------------------------------------------------------
/riegeli/lz4/lz4_dictionary.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Enables the experimental lz4 API:
16 | // * `LZ4F_createCDict()`
17 | // * `LZ4F_freeCDict()`
18 | #define LZ4F_STATIC_LINKING_ONLY
19 |
20 | #include "riegeli/lz4/lz4_dictionary.h"
21 |
22 | #include
23 |
24 | #include "absl/base/attributes.h"
25 | #include "absl/base/call_once.h"
26 | #include "absl/strings/string_view.h"
27 | #include "lz4frame.h"
28 | #include "riegeli/base/shared_ptr.h"
29 |
30 | namespace riegeli {
31 |
32 | void Lz4Dictionary::Repr::LZ4F_CDictDeleter::operator()(LZ4F_CDict* ptr) const {
33 | LZ4F_freeCDict(ptr);
34 | }
35 |
36 | inline const LZ4F_CDict* Lz4Dictionary::Repr::PrepareCompressionDictionary()
37 | const {
38 | absl::call_once(compression_once_, [&] {
39 | compression_dictionary_.reset(LZ4F_createCDict(data_.data(), data_.size()));
40 | });
41 | return compression_dictionary_.get();
42 | }
43 |
44 | const LZ4F_CDict* Lz4Dictionary::PrepareCompressionDictionary() const
45 | ABSL_ATTRIBUTE_LIFETIME_BOUND {
46 | if (repr_ == nullptr) return nullptr;
47 | return repr_->PrepareCompressionDictionary();
48 | }
49 |
50 | } // namespace riegeli
51 |
--------------------------------------------------------------------------------
/riegeli/digests/crc32_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_CRC32_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_CRC32_DIGESTER_H_
17 |
18 | #include
19 |
20 | #include "absl/strings/string_view.h"
21 |
22 | namespace riegeli {
23 |
24 | // A digester computing CRC32 checksums, for `DigestingReader` and
25 | // `DigestingWriter`.
26 | //
27 | // This uses the polynomial x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 +
28 | // x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 (0x104c11db7).
29 | //
30 | // This polynomial is used e.g. by gzip, zip, and png:
31 | // https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations_of_cyclic_redundancy_checks
32 | class Crc32Digester {
33 | public:
34 | Crc32Digester() : Crc32Digester(0) {}
35 |
36 | explicit Crc32Digester(uint32_t seed);
37 |
38 | Crc32Digester(const Crc32Digester& that) = default;
39 | Crc32Digester& operator=(const Crc32Digester& that) = default;
40 |
41 | void Write(absl::string_view src);
42 | uint32_t Digest() { return crc_; }
43 |
44 | private:
45 | uint32_t crc_;
46 | };
47 |
48 | } // namespace riegeli
49 |
50 | #endif // RIEGELI_DIGESTS_CRC32_DIGESTER_H_
51 |
--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2019 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e
17 |
18 | PYTHON_BIN_PATH=`which python`
19 |
20 | if [[ $PYTHON_BIN_PATH ]] && $PYTHON_BIN_PATH -c "import tensorflow" &>/dev/null; then
21 | TF_CFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
22 | TF_LFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
23 |
24 | TF_HEADER_DIR=${TF_CFLAGS%% *}
25 | TF_HEADER_DIR=${TF_HEADER_DIR#-I}
26 | TF_SHARED_LIBRARY_DIR=${TF_LFLAGS%% *}
27 | TF_SHARED_LIBRARY_DIR=${TF_SHARED_LIBRARY_DIR#-L}
28 | TF_SHARED_LIBRARY_NAME=${TF_LFLAGS##* -l:}
29 | else
30 | TF_HEADER_DIR=
31 | TF_SHARED_LIBRARY_DIR=
32 | TF_SHARED_LIBRARY_NAME=
33 | fi
34 |
35 | {
36 | printf 'build --action_env PYTHON_BIN_PATH="%s"\n' "$PYTHON_BIN_PATH"
37 | printf 'build --action_env TF_HEADER_DIR="%s"\n' "$TF_HEADER_DIR"
38 | printf 'build --action_env TF_SHARED_LIBRARY_DIR="%s"\n' "$TF_SHARED_LIBRARY_DIR"
39 | printf 'build --action_env TF_SHARED_LIBRARY_NAME="%s"\n' "$TF_SHARED_LIBRARY_NAME"
40 | } >configure.bazelrc
41 |
42 | echo "Set up configure.bazelrc. Make sure to include it in your .bazelrc file."
43 |
--------------------------------------------------------------------------------
/riegeli/base/stream_utils.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/stream_utils.h"
16 |
17 | #include
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 |
24 | #include "absl/base/nullability.h"
25 |
26 | ABSL_POINTERS_DEFAULT_NONNULL
27 |
28 | namespace riegeli {
29 |
30 | void WritePadding(std::ostream& dest, size_t length, char fill) {
31 | char buffer[64];
32 | std::memset(buffer, fill, sizeof(buffer));
33 | while (length > sizeof(buffer)) {
34 | dest.write(buffer, std::streamsize{sizeof(buffer)});
35 | length -= sizeof(buffer);
36 | }
37 | dest.write(buffer, static_cast(length));
38 | }
39 |
40 | int StringifyOStream::StringStreambuf::overflow(int src) {
41 | if (src != traits_type::eof()) dest_->push_back(static_cast(src));
42 | return traits_type::not_eof(src);
43 | }
44 |
45 | std::streamsize StringifyOStream::StringStreambuf::xsputn(
46 | const char* absl_nullable src, std::streamsize length) {
47 | assert(length >= 0);
48 | dest_->append(src, static_cast(length));
49 | return length;
50 | }
51 |
52 | } // namespace riegeli
53 |
--------------------------------------------------------------------------------
/riegeli/base/closing_ptr.h:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_CLOSING_PTR_H_
16 | #define RIEGELI_BASE_CLOSING_PTR_H_
17 |
18 | #include
19 |
20 | #include "absl/base/attributes.h"
21 | #include "absl/base/nullability.h"
22 |
23 | ABSL_POINTERS_DEFAULT_NONNULL
24 |
25 | namespace riegeli {
26 |
27 | // A deleter for `std::unique_ptr` which does nothing.
28 | struct NullDeleter {
29 | template
30 | void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {}
31 | };
32 |
33 | // Marks the pointer with the intent to transfer the responsibility to close the
34 | // object when done with the pointer, even though the object is not moved nor
35 | // destroyed.
36 | //
37 | // In the context of `Dependency` and `Any`, passing `ClosingPtr(&m)`
38 | // instead of `std::move(m)` avoids moving `m`, but the caller must ensure that
39 | // the dependent object is valid while the host object needs it.
40 |
41 | template
42 | using ClosingPtrType = std::unique_ptr;
43 |
44 | template
45 | inline ClosingPtrType ClosingPtr(T* ptr) {
46 | return ClosingPtrType(ptr);
47 | }
48 |
49 | } // namespace riegeli
50 |
51 | #endif // RIEGELI_BASE_CLOSING_PTR_H_
52 |
--------------------------------------------------------------------------------
/riegeli/base/parallelism.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_PARALLELISM_H_
16 | #define RIEGELI_BASE_PARALLELISM_H_
17 |
18 | #include
19 |
20 | #include
21 |
22 | #include "absl/base/attributes.h"
23 | #include "absl/base/thread_annotations.h"
24 | #include "absl/functional/any_invocable.h"
25 | #include "absl/synchronization/mutex.h"
26 |
27 | namespace riegeli::internal {
28 |
29 | // A thread pool with lazily created worker threads, without a thread count
30 | // limit. Worker threads exit after being idle for one minute.
31 | class ThreadPool {
32 | public:
33 | ThreadPool() {}
34 |
35 | ThreadPool(const ThreadPool&) = delete;
36 | ThreadPool& operator=(const ThreadPool&) = delete;
37 |
38 | ~ThreadPool();
39 |
40 | static ThreadPool& global();
41 |
42 | void Schedule(absl::AnyInvocable task);
43 |
44 | private:
45 | absl::Mutex mutex_;
46 | bool exiting_ ABSL_GUARDED_BY(mutex_) = false;
47 | size_t num_threads_ ABSL_GUARDED_BY(mutex_) = 0;
48 | size_t num_idle_threads_ ABSL_GUARDED_BY(mutex_) = 0;
49 | std::deque> tasks_ ABSL_GUARDED_BY(mutex_);
50 | };
51 |
52 | } // namespace riegeli::internal
53 |
54 | #endif // RIEGELI_BASE_PARALLELISM_H_
55 |
--------------------------------------------------------------------------------
/python/riegeli/records/record_position.h:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_
16 | #define PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_
17 |
18 | // From https://docs.python.org/3/c-api/intro.html:
19 | // Since Python may define some pre-processor definitions which affect the
20 | // standard headers on some systems, you must include Python.h before any
21 | // standard headers are included.
22 | #include
23 | // clang-format: do not reorder the above include.
24 |
25 | #include
26 |
27 | #include "python/riegeli/base/utils.h"
28 | #include "riegeli/records/record_position.h"
29 |
30 | namespace riegeli::python {
31 |
32 | // Access the API thus:
33 | // ```
34 | // static constexpr ImportedCapsule kRecordPositionApi(
35 | // kRecordPositionCapsuleName);
36 | // ```
37 |
38 | struct RecordPositionApi {
39 | PythonPtr (*RecordPositionToPython)(FutureRecordPosition value);
40 | std::optional (*RecordPositionFromPython)(PyObject* object);
41 | };
42 |
43 | inline constexpr const char* kRecordPositionCapsuleName =
44 | "riegeli.records.record_position._CPPAPI";
45 |
46 | } // namespace riegeli::python
47 |
48 | #endif // PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_
49 |
--------------------------------------------------------------------------------
/riegeli/base/ownership.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_OWNERSHIP_H_
16 | #define RIEGELI_BASE_OWNERSHIP_H_
17 |
18 | #include
19 |
20 | namespace riegeli {
21 |
22 | // `PassOwnership` and `ShareOwnership` type tags specify how ownership of a
23 | // potentially shared object is transferred, for cases when this is not implied
24 | // by parameter types.
25 | //
26 | // * `PassOwnership`: the original owner drops its reference. The reference
27 | // count is decreased unless the new owner gets a reference instead.
28 | //
29 | // * `ShareOwnership`: The original owner keeps its reference. The reference
30 | // count is increased if the new owner also gets a reference.
31 |
32 | struct PassOwnership {};
33 | inline constexpr PassOwnership kPassOwnership = {};
34 |
35 | struct ShareOwnership {};
36 | inline constexpr ShareOwnership kShareOwnership = {};
37 |
38 | // `IsOwnership::value` is `true` if `T` is `PassOwnership` or
39 | // `ShareOwnership`.
40 |
41 | template
42 | struct IsOwnership : std::false_type {};
43 |
44 | template <>
45 | struct IsOwnership : std::true_type {};
46 |
47 | template <>
48 | struct IsOwnership : std::true_type {};
49 |
50 | } // namespace riegeli
51 |
52 | #endif // RIEGELI_BASE_OWNERSHIP_H_
53 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/brotli_encoder_selection.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
16 | #define RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
17 |
18 | #include
19 |
20 | #include "riegeli/base/chain.h"
21 | #include "riegeli/base/recycling_pool.h"
22 | #include "riegeli/bytes/writer.h"
23 | #include "riegeli/chunk_encoding/compressor_options.h"
24 |
25 | namespace riegeli::chunk_encoding_internal {
26 |
27 | // Creates a `Writer` which compresses data with Brotli and writes them to
28 | // `compressed`.
29 | //
30 | // The encoder implementation is determined by
31 | // `compressor_options.brotli_encoder()`.
32 | //
33 | // This is a weak function. Its default definition supports only C Brotli.
34 | // It can be overridden to support also Rust Brotli.
35 | std::unique_ptr NewBrotliWriter(
36 | Chain* compressed, const CompressorOptions& compressor_options,
37 | const RecyclingPoolOptions& recycling_pool_options);
38 |
39 | // Support for `NewBrotliWriter()`: uses C Brotli, ignores
40 | // `compressor_options.brotli_encoder()`.
41 | std::unique_ptr NewCBrotliWriter(
42 | Chain* compressed, const CompressorOptions& compressor_options);
43 |
44 | } // namespace riegeli::chunk_encoding_internal
45 |
46 | #endif // RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
47 |
--------------------------------------------------------------------------------
/riegeli/snappy/hadoop/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "hadoop_snappy_reader",
12 | srcs = ["hadoop_snappy_reader.cc"],
13 | hdrs = ["hadoop_snappy_reader.h"],
14 | deps = [
15 | "//riegeli/base:arithmetic",
16 | "//riegeli/base:assert",
17 | "//riegeli/base:buffer",
18 | "//riegeli/base:dependency",
19 | "//riegeli/base:initializer",
20 | "//riegeli/base:object",
21 | "//riegeli/base:status",
22 | "//riegeli/base:types",
23 | "//riegeli/bytes:pullable_reader",
24 | "//riegeli/bytes:reader",
25 | "//riegeli/endian:endian_reading",
26 | "@com_google_absl//absl/base:core_headers",
27 | "@com_google_absl//absl/status",
28 | "@com_google_absl//absl/strings",
29 | "@com_google_absl//absl/strings:string_view",
30 | "@snappy",
31 | ],
32 | )
33 |
34 | cc_library(
35 | name = "hadoop_snappy_writer",
36 | srcs = ["hadoop_snappy_writer.cc"],
37 | hdrs = ["hadoop_snappy_writer.h"],
38 | deps = [
39 | ":hadoop_snappy_reader",
40 | "//riegeli/base:arithmetic",
41 | "//riegeli/base:assert",
42 | "//riegeli/base:buffer",
43 | "//riegeli/base:buffering",
44 | "//riegeli/base:dependency",
45 | "//riegeli/base:initializer",
46 | "//riegeli/base:object",
47 | "//riegeli/base:status",
48 | "//riegeli/base:types",
49 | "//riegeli/bytes:pushable_writer",
50 | "//riegeli/bytes:reader",
51 | "//riegeli/bytes:writer",
52 | "//riegeli/endian:endian_writing",
53 | "@com_google_absl//absl/base:core_headers",
54 | "@com_google_absl//absl/status",
55 | "@com_google_absl//absl/strings",
56 | "@snappy",
57 | ],
58 | )
59 |
--------------------------------------------------------------------------------
/riegeli/bytes/iostream_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BYTES_IOSTREAM_INTERNAL_H_
16 | #define RIEGELI_BYTES_IOSTREAM_INTERNAL_H_
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | namespace riegeli::iostream_internal {
23 |
24 | // There is no `std::istream::close()` nor `std::ostream::close()`, but some
25 | // subclasses have `close()`, e.g. `std::ifstream`, `std::ofstream`,
26 | // `std::fstream`. It is important to call `close()` before their destructor
27 | // to detect errors.
28 | //
29 | // `iostream_internal::Close(stream)` calls `stream->close()` if that is
30 | // defined, otherwise does nothing.
31 |
32 | template
33 | struct HasClose : std::false_type {};
34 |
35 | template
36 | struct HasClose().close())>>
37 | : std::true_type {};
38 |
39 | template
40 | inline void Close(Stream& stream) {
41 | if constexpr (HasClose::value) {
42 | stream.close();
43 | }
44 | }
45 |
46 | template
47 | inline std::istream* DetectIStream(T* stream) {
48 | if constexpr (std::is_base_of_v) {
49 | return stream;
50 | } else {
51 | return nullptr;
52 | }
53 | }
54 |
55 | } // namespace riegeli::iostream_internal
56 |
57 | #endif // RIEGELI_BYTES_IOSTREAM_INTERNAL_H_
58 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/chunk_encoder.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/chunk_encoding/chunk_encoder.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/status/status.h"
21 | #include "absl/strings/cord.h"
22 | #include "google/protobuf/message_lite.h"
23 | #include "riegeli/base/chain.h"
24 | #include "riegeli/messages/serialize_message.h"
25 |
26 | namespace riegeli {
27 |
28 | void ChunkEncoder::Done() {
29 | num_records_ = 0;
30 | decoded_data_size_ = 0;
31 | }
32 |
33 | bool ChunkEncoder::AddRecord(const google::protobuf::MessageLite& record,
34 | SerializeMessageOptions serialize_options) {
35 | if (ABSL_PREDICT_FALSE(!ok())) return false;
36 | Chain serialized;
37 | if (absl::Status status =
38 | SerializeMessage(record, serialized, serialize_options);
39 | ABSL_PREDICT_FALSE(!status.ok())) {
40 | return Fail(std::move(status));
41 | }
42 | return AddRecord(std::move(serialized));
43 | }
44 |
45 | bool ChunkEncoder::AddRecord(Chain&& record) {
46 | // Not `std::move(record)`: forward to `AddRecord(const Chain&)`.
47 | return AddRecord(record);
48 | }
49 |
50 | bool ChunkEncoder::AddRecord(absl::Cord&& record) {
51 | // Not `std::move(record)`: forward to `AddRecord(const absl::Cord&)`.
52 | return AddRecord(record);
53 | }
54 |
55 | } // namespace riegeli
56 |
--------------------------------------------------------------------------------
/riegeli/records/tools/tfrecord_recognizer.h:
--------------------------------------------------------------------------------
1 | #include "absl/base/attributes.h"
2 | // Copyright 2018 Google LLC
3 | //
4 | // Licensed under the Apache License, Version 2.0 (the "License");
5 | // you may not use this file except in compliance with the License.
6 | // You may obtain a copy of the License at
7 | //
8 | // http://www.apache.org/licenses/LICENSE-2.0
9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 |
16 | #ifndef RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_
17 | #define RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_
18 |
19 | #include "riegeli/base/assert.h"
20 | #include "riegeli/base/object.h"
21 | #include "riegeli/bytes/reader.h"
22 | #include "tensorflow/core/lib/io/record_reader.h"
23 |
24 | namespace riegeli {
25 |
26 | class TFRecordRecognizer : public Object {
27 | public:
28 | explicit TFRecordRecognizer(
29 | Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND);
30 |
31 | // Ensures that the file looks like a valid TFRecord file.
32 | //
33 | // Updates `record_reader_options` on success.
34 | //
35 | // Return values:
36 | // * `true` - success (`record_reader_options` is updated)
37 | // * `false` (when `ok()`) - source ends
38 | // * `false` (when `!ok()`) - failure
39 | bool CheckFileFormat(
40 | tensorflow::io::RecordReaderOptions& record_reader_options);
41 |
42 | private:
43 | Reader* byte_reader_;
44 | };
45 |
46 | // Implementation details follow.
47 |
48 | inline TFRecordRecognizer::TFRecordRecognizer(
49 | Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND)
50 | : byte_reader_(RIEGELI_EVAL_ASSERT_NOTNULL(byte_reader)) {}
51 |
52 | } // namespace riegeli
53 |
54 | #endif // RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_
55 |
--------------------------------------------------------------------------------
/riegeli/brotli/brotli_dictionary.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/brotli/brotli_dictionary.h"
16 |
17 | #include
18 | #include
19 |
20 | #include
21 |
22 | #include "absl/base/attributes.h"
23 | #include "absl/base/call_once.h"
24 | #include "absl/strings/string_view.h"
25 | #include "brotli/encode.h"
26 | #include "brotli/shared_dictionary.h"
27 | #include "riegeli/base/assert.h"
28 |
29 | namespace riegeli {
30 |
31 | const BrotliEncoderPreparedDictionary*
32 | BrotliDictionary::Chunk::PrepareCompressionDictionary() const
33 | ABSL_ATTRIBUTE_LIFETIME_BOUND {
34 | absl::call_once(compression_once_, [&] {
35 | if (type_ == Type::kNative) {
36 | RIEGELI_ASSERT_NE(compression_dictionary_, nullptr)
37 | << "Failed invariant of BrotliDictionary::Chunk: "
38 | "unprepared native chunk";
39 | return;
40 | }
41 | owned_compression_dictionary_.reset(BrotliEncoderPrepareDictionary(
42 | static_cast(type_), data_.size(),
43 | reinterpret_cast(data_.data()), BROTLI_MAX_QUALITY,
44 | // `BrotliAllocator` is not supported here because the prepared
45 | // dictionary may easily outlive the allocator.
46 | nullptr, nullptr, nullptr));
47 | compression_dictionary_ = owned_compression_dictionary_.get();
48 | });
49 | return compression_dictionary_;
50 | }
51 |
52 | } // namespace riegeli
53 |
--------------------------------------------------------------------------------
/riegeli/base/assert.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/assert.h"
16 |
17 | #include
18 |
19 | #include "absl/base/nullability.h"
20 | #include "absl/log/absl_log.h"
21 | #include "riegeli/base/stream_utils.h"
22 |
23 | ABSL_POINTERS_DEFAULT_NONNULL
24 |
25 | namespace riegeli::assert_internal {
26 |
27 | CheckResult::CheckResult(const char* function, const char* prefix)
28 | : header_(new StringOStream(new std::string())) {
29 | header() << "Check failed in " << function << ": " << prefix;
30 | }
31 |
32 | CheckFailed::CheckFailed(const char* file, int line, CheckResult check_result)
33 | : file_(file),
34 | line_(line),
35 | check_result_(check_result),
36 | details_(new StringOStream(new std::string())) {}
37 |
38 | CheckFailed::~CheckFailed() {
39 | if (!details_->dest()->empty()) {
40 | check_result_.header() << "; " << *details_->dest();
41 | }
42 | ABSL_LOG(FATAL).AtLocation(file_, line_) << *check_result_.header().dest();
43 | }
44 |
45 | void CheckNotNullFailed(const char* file, int line, const char* function,
46 | const char* expression) {
47 | CheckResult check_result(function, expression);
48 | check_result.header() << " != nullptr";
49 | CheckFailed check_failed(file, line, check_result);
50 | }
51 |
52 | CheckResult CheckImpossibleResult(const char* function) {
53 | return CheckResult(function, "Impossible");
54 | }
55 |
56 | } // namespace riegeli::assert_internal
57 |
--------------------------------------------------------------------------------
/riegeli/base/estimated_allocated_size.h:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_
16 | #define RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_
17 |
18 | #include
19 |
20 | #include "absl/base/attributes.h"
21 | #include "absl/base/nullability.h"
22 | #include "riegeli/base/arithmetic.h"
23 |
24 | ABSL_POINTERS_DEFAULT_NONNULL
25 |
26 | namespace riegeli {
27 |
28 | // Returns the estimated size which will be allocated when requesting to
29 | // allocate `requested_size`.
30 | inline size_t EstimatedAllocatedSize(size_t requested_size) {
31 | // Placeholder for asking the memory manager, which might be possible on some
32 | // platforms.
33 | return RoundUp<2 * sizeof(void*)>(
34 | UnsignedMax(requested_size, 4 * sizeof(void*)));
35 | }
36 |
37 | // Returns the estimated size which was allocated at `ptr` when requested to
38 | // allocate `requested_size`.
39 | inline size_t EstimatedAllocatedSize(ABSL_ATTRIBUTE_UNUSED const void* ptr,
40 | size_t requested_size) {
41 | // Placeholder for using `ptr`, which might be possible on some platforms.
42 | return EstimatedAllocatedSize(requested_size);
43 | }
44 |
45 | // A deterministic variant of `EstimatedAllocatedSize()`, useful for testing.
46 | inline size_t EstimatedAllocatedSizeForTesting(size_t requested_size) {
47 | return 16 + requested_size;
48 | }
49 |
50 | } // namespace riegeli
51 |
52 | #endif // RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_
53 |
--------------------------------------------------------------------------------
/riegeli/bytes/fd_internal.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef _WIN32
16 |
17 | // Make `readlink()` available, and make `O_CLOEXEC` available on Darwin.
18 | #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700
19 | #undef _XOPEN_SOURCE
20 | #define _XOPEN_SOURCE 700
21 | #endif
22 |
23 | #endif
24 |
25 | #include "riegeli/bytes/fd_internal.h"
26 |
27 | #ifdef __APPLE__
28 | #include
29 | #endif
30 | #ifndef _WIN32
31 | #include
32 | #include
33 | #endif
34 |
35 | #include
36 |
37 | #ifndef _WIN32
38 | #include "absl/base/optimization.h"
39 | #endif
40 | #include "absl/strings/str_cat.h"
41 | #ifndef _WIN32
42 | #include "riegeli/base/arithmetic.h"
43 | #include "riegeli/base/buffer.h"
44 | #endif
45 |
46 | namespace riegeli::fd_internal {
47 |
48 | std::string FilenameForFd(int fd) {
49 | #ifndef _WIN32
50 | std::string filename = absl::StrCat("/proc/self/fd/", fd);
51 | Buffer buffer(PATH_MAX);
52 | const ssize_t length = readlink(filename.c_str(), buffer.data(), PATH_MAX);
53 | if (ABSL_PREDICT_TRUE(length >= 0)) {
54 | filename.assign(buffer.data(), IntCast(length));
55 | }
56 | return filename;
57 | #else // _WIN32
58 | return absl::StrCat("");
59 | #endif // _WIN32
60 | }
61 |
62 | #ifdef __APPLE__
63 | // On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is
64 | // defined out of line.
65 | extern const int kCloseOnExec = O_CLOEXEC;
66 | #endif // __APPLE__
67 |
68 | } // namespace riegeli::fd_internal
69 |
--------------------------------------------------------------------------------
/riegeli/gcs/gcs_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_GCS_GCS_INTERNAL_H_
16 | #define RIEGELI_GCS_GCS_INTERNAL_H_
17 |
18 | #include "absl/base/attributes.h"
19 | #include "absl/status/status.h"
20 | #include "google/cloud/status.h"
21 |
22 | namespace riegeli::gcs_internal {
23 |
24 | inline absl::Status FromCloudStatus(const google::cloud::Status& status) {
25 | return absl::Status(static_cast(status.code()),
26 | status.message());
27 | }
28 |
29 | template
30 | T GetOption() {
31 | return T();
32 | }
33 | template
34 | const T& GetOption(const T& option) {
35 | return option;
36 | }
37 | template
38 | const T& GetOption(ABSL_ATTRIBUTE_UNUSED const T& previous_option,
39 | const T& option, const Options&... options) {
40 | return GetOption(option, options...);
41 | }
42 | template
43 | const T& GetOption(const T& option,
44 | ABSL_ATTRIBUTE_UNUSED const Other& other_option,
45 | const Options&... options) {
46 | return GetOption(option, options...);
47 | }
48 | template
49 | auto GetOption(ABSL_ATTRIBUTE_UNUSED const Other& other_option,
50 | const Options&... options) {
51 | return GetOption(options...);
52 | }
53 |
54 | } // namespace riegeli::gcs_internal
55 |
56 | #endif // RIEGELI_GCS_GCS_INTERNAL_H_
57 |
--------------------------------------------------------------------------------
/riegeli/messages/message_wire_format.h:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_
16 | #define RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_
17 |
18 | #include
19 |
20 | namespace riegeli {
21 |
22 | // Low level functions for writing and reading serialized proto messages
23 | // directly.
24 | //
25 | // They mostly correspond to selected members of
26 | // `google::protobuf::internal::WireFormatLite`.
27 |
28 | // The part of a field tag which denotes the representation of the field value
29 | // which follows the tag.
30 | enum class WireType : uint32_t {
31 | kVarint = 0,
32 | kFixed32 = 5,
33 | kFixed64 = 1,
34 | kLengthDelimited = 2,
35 | kStartGroup = 3,
36 | kEndGroup = 4,
37 | kInvalid6 = 6,
38 | kInvalid7 = 7,
39 | };
40 |
41 | // Composes/decomposes a field tag.
42 | constexpr uint32_t MakeTag(int field_number, WireType wire_type);
43 | constexpr WireType GetTagWireType(uint32_t tag);
44 | constexpr int GetTagFieldNumber(uint32_t tag);
45 |
46 | // Implementation details follow.
47 |
48 | constexpr uint32_t MakeTag(int field_number, WireType wire_type) {
49 | return (static_cast(field_number) << 3) |
50 | static_cast(wire_type);
51 | }
52 |
53 | constexpr WireType GetTagWireType(uint32_t tag) {
54 | return static_cast(tag & 7);
55 | }
56 |
57 | constexpr int GetTagFieldNumber(uint32_t tag) {
58 | return static_cast(tag >> 3);
59 | }
60 |
61 | } // namespace riegeli
62 |
63 | #endif // RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_
64 |
--------------------------------------------------------------------------------
/riegeli/base/external_data.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_EXTERNAL_DATA_H_
16 | #define RIEGELI_BASE_EXTERNAL_DATA_H_
17 |
18 | #include
19 | #include
20 |
21 | #include "absl/strings/string_view.h"
22 |
23 | namespace riegeli {
24 |
25 | // Type-erased external object with its deleter.
26 | //
27 | // `ExternalStorage` can be decomposed with `void* ExternalStorage::release()`
28 | // and `ExternalStorage::get_deleter() -> void (*)(void*)`.
29 | using ExternalStorage = std::unique_ptr;
30 |
31 | // Supports `ExternalRef`.
32 | inline ExternalStorage RiegeliToExternalStorage(ExternalStorage* self) {
33 | return std::move(*self);
34 | }
35 |
36 | // Type-erased external object with its deleter and a substring of a byte array
37 | // it owns.
38 | struct ExternalData {
39 | /*implicit*/ operator absl::string_view() const { return substr; }
40 |
41 | // Indicates support for:
42 | // * `ExternalRef(ExternalData&&)`
43 | // * `ExternalRef(ExternalData&&, substr)`
44 | friend void RiegeliSupportsExternalRef(ExternalData*) {}
45 |
46 | // Supports `ExternalRef`.
47 | friend ExternalStorage RiegeliToExternalStorage(ExternalData* self) {
48 | return std::move(self->storage);
49 | }
50 |
51 | ExternalStorage storage; // Must outlive usages of `substr`.
52 | absl::string_view substr;
53 | };
54 |
55 | // Creates `ExternalData` holding a copy of `data`.
56 | ExternalData ExternalDataCopy(absl::string_view data);
57 |
58 | } // namespace riegeli
59 |
60 | #endif // RIEGELI_BASE_EXTERNAL_DATA_H_
61 |
--------------------------------------------------------------------------------
/python/riegeli/bytes/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//python/riegeli:__subpackages__"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "python_reader",
12 | srcs = ["python_reader.cc"],
13 | hdrs = ["python_reader.h"],
14 | # python_reader.cc has #define before #include to influence what the
15 | # included files provide.
16 | features = ["-use_header_modules"],
17 | deps = [
18 | "//python/riegeli/base:utils",
19 | "//riegeli/base:arithmetic",
20 | "//riegeli/base:assert",
21 | "//riegeli/base:global",
22 | "//riegeli/base:object",
23 | "//riegeli/base:types",
24 | "//riegeli/bytes:buffer_options",
25 | "//riegeli/bytes:buffered_reader",
26 | "@com_google_absl//absl/base:core_headers",
27 | "@com_google_absl//absl/numeric:bits",
28 | "@com_google_absl//absl/status",
29 | "@com_google_absl//absl/strings",
30 | "@com_google_absl//absl/strings:string_view",
31 | "@com_google_absl//absl/types:span",
32 | "@rules_python//python/cc:current_py_cc_headers",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "python_writer",
38 | srcs = ["python_writer.cc"],
39 | hdrs = ["python_writer.h"],
40 | # python_writer.cc has #define before #include to influence what the
41 | # included files provide.
42 | features = ["-use_header_modules"],
43 | deps = [
44 | "//python/riegeli/base:utils",
45 | "//riegeli/base:arithmetic",
46 | "//riegeli/base:assert",
47 | "//riegeli/base:global",
48 | "//riegeli/base:object",
49 | "//riegeli/base:types",
50 | "//riegeli/bytes:buffer_options",
51 | "//riegeli/bytes:buffered_writer",
52 | "@com_google_absl//absl/base:core_headers",
53 | "@com_google_absl//absl/numeric:bits",
54 | "@com_google_absl//absl/status",
55 | "@com_google_absl//absl/strings",
56 | "@com_google_absl//absl/strings:string_view",
57 | "@rules_python//python/cc:current_py_cc_headers",
58 | ],
59 | )
60 |
--------------------------------------------------------------------------------
/python/riegeli/records/examples/write_read_records.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Simple example which writes and reads a Riegeli/records file."""
15 |
16 | import io
17 |
18 | import riegeli
19 | from riegeli.records.tests import records_test_pb2
20 |
21 |
22 | def sample_string(i, size):
23 | piece = f'{i} '.encode()
24 | result = piece * -(-size // len(piece)) # len(result) >= size
25 | return result[:size]
26 |
27 |
28 | def sample_message(i, size):
29 | return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size))
30 |
31 |
32 | def write_records(filename):
33 | print('Writing', filename)
34 | metadata = riegeli.RecordsMetadata()
35 | riegeli.set_record_type(metadata, records_test_pb2.SimpleMessage)
36 | with riegeli.RecordWriter(
37 | io.FileIO(filename, mode='wb'), options='transpose', metadata=metadata
38 | ) as writer:
39 | writer.write_messages(sample_message(i, 100) for i in range(100))
40 |
41 |
42 | def read_records(filename):
43 | print('Reading', filename)
44 | with riegeli.RecordReader(
45 | io.FileIO(filename, mode='rb'),
46 | field_projection=[[
47 | records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name['id'].number
48 | ]],
49 | ) as reader:
50 | print(
51 | ' '.join(
52 | str(record.id)
53 | for record in reader.read_messages(records_test_pb2.SimpleMessage)
54 | )
55 | )
56 |
57 |
58 | def main():
59 | filename = '/tmp/riegeli_example'
60 | write_records(filename)
61 | read_records(filename)
62 |
63 |
64 | if __name__ == '__main__':
65 | main()
66 |
--------------------------------------------------------------------------------
/riegeli/snappy/framed/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "framed_snappy_reader",
12 | srcs = ["framed_snappy_reader.cc"],
13 | hdrs = ["framed_snappy_reader.h"],
14 | deps = [
15 | "//riegeli/base:arithmetic",
16 | "//riegeli/base:assert",
17 | "//riegeli/base:buffer",
18 | "//riegeli/base:dependency",
19 | "//riegeli/base:initializer",
20 | "//riegeli/base:moving_dependency",
21 | "//riegeli/base:object",
22 | "//riegeli/base:status",
23 | "//riegeli/base:types",
24 | "//riegeli/bytes:pullable_reader",
25 | "//riegeli/bytes:reader",
26 | "//riegeli/endian:endian_reading",
27 | "@com_google_absl//absl/base:core_headers",
28 | "@com_google_absl//absl/crc:crc32c",
29 | "@com_google_absl//absl/status",
30 | "@com_google_absl//absl/strings",
31 | "@com_google_absl//absl/strings:string_view",
32 | "@snappy",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "framed_snappy_writer",
38 | srcs = ["framed_snappy_writer.cc"],
39 | hdrs = ["framed_snappy_writer.h"],
40 | deps = [
41 | ":framed_snappy_reader",
42 | "//riegeli/base:arithmetic",
43 | "//riegeli/base:assert",
44 | "//riegeli/base:buffer",
45 | "//riegeli/base:buffering",
46 | "//riegeli/base:dependency",
47 | "//riegeli/base:initializer",
48 | "//riegeli/base:object",
49 | "//riegeli/base:status",
50 | "//riegeli/base:types",
51 | "//riegeli/bytes:pushable_writer",
52 | "//riegeli/bytes:reader",
53 | "//riegeli/bytes:writer",
54 | "//riegeli/digests:crc32c_digester",
55 | "//riegeli/digests:digesting_writer",
56 | "//riegeli/endian:endian_writing",
57 | "@com_google_absl//absl/base:core_headers",
58 | "@com_google_absl//absl/status",
59 | "@com_google_absl//absl/strings",
60 | "@com_google_absl//absl/strings:string_view",
61 | "@snappy",
62 | ],
63 | )
64 |
--------------------------------------------------------------------------------
/riegeli/digests/openssl_digester.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_
16 | #define RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_
17 |
18 | #include
19 | #include
20 |
21 | #include
22 |
23 | #include "absl/base/attributes.h"
24 | #include "absl/strings/string_view.h"
25 |
26 | namespace riegeli {
27 |
28 | // A digester template computing checksums implemented by OpenSSL, for
29 | // `DigestingReader` and `DigestingWriter`.
30 | template
32 | class OpenSslDigester {
33 | public:
34 | OpenSslDigester() { init(&ctx_); }
35 |
36 | OpenSslDigester(const OpenSslDigester& that) = default;
37 | OpenSslDigester& operator=(const OpenSslDigester& that) = default;
38 |
39 | ABSL_ATTRIBUTE_REINITIALIZES void Reset() {
40 | init(&ctx_);
41 | is_open_ = true;
42 | }
43 |
44 | void Write(absl::string_view src) { update(&ctx_, src.data(), src.size()); }
45 |
46 | void Close() {
47 | if (is_open_) {
48 | final(reinterpret_cast(digest_.data()), &ctx_);
49 | is_open_ = false;
50 | }
51 | }
52 |
53 | std::array Digest() {
54 | if (is_open_) {
55 | H copy = ctx_;
56 | final(reinterpret_cast(digest_.data()), ©);
57 | }
58 | return digest_;
59 | }
60 |
61 | private:
62 | H ctx_;
63 | std::array digest_;
64 | bool is_open_ = true;
65 | };
66 |
67 | } // namespace riegeli
68 |
69 | #endif // RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_
70 |
--------------------------------------------------------------------------------
/MODULE.bazel:
--------------------------------------------------------------------------------
1 | module(
2 | name = "riegeli",
3 | repo_name = "com_google_riegeli",
4 | )
5 |
6 | bazel_dep(
7 | name = "abseil-cpp",
8 | version = "20250814.0",
9 | repo_name = "com_google_absl",
10 | )
11 | bazel_dep(
12 | name = "abseil-py",
13 | version = "2.1.0",
14 | repo_name = "absl_py",
15 | )
16 | bazel_dep(
17 | name = "bazel_skylib",
18 | version = "1.7.1",
19 | )
20 | bazel_dep(
21 | name = "boringssl",
22 | version = "0.0.0-20240530-2db0eb3",
23 | )
24 | bazel_dep(
25 | name = "brotli",
26 | version = "1.1.0",
27 | repo_name = "org_brotli",
28 | )
29 | bazel_dep(
30 | name = "bzip2",
31 | version = "1.0.8",
32 | )
33 | bazel_dep(
34 | name = "highwayhash",
35 | version = "0.0.0-20240305-5ad3bf8",
36 | )
37 | bazel_dep(
38 | name = "lz4",
39 | version = "1.9.4",
40 | )
41 | bazel_dep(
42 | name = "platforms",
43 | version = "0.0.9",
44 | )
45 | bazel_dep(
46 | name = "protobuf",
47 | version = "28.3",
48 | repo_name = "com_google_protobuf",
49 | )
50 | bazel_dep(
51 | name = "rules_cc",
52 | version = "0.1.2",
53 | )
54 | bazel_dep(
55 | name = "rules_python",
56 | version = "0.36.0",
57 | )
58 | bazel_dep(
59 | name = "snappy",
60 | version = "1.2.0",
61 | )
62 | bazel_dep(
63 | name = "xz",
64 | version = "5.4.5.bcr.1",
65 | )
66 | bazel_dep(
67 | name = "zlib",
68 | version = "1.3.1.bcr.3",
69 | )
70 | bazel_dep(
71 | name = "zstd",
72 | version = "1.5.6",
73 | repo_name = "net_zstd",
74 | )
75 | bazel_dep(
76 | name = "google_cloud_cpp",
77 | version = "3.0.0-rc1",
78 | )
79 |
80 | # Configure hermetic Python toolchain
81 | SUPPORTED_PYTHON_VERSIONS = [
82 | "3.8",
83 | "3.9",
84 | "3.10",
85 | "3.11",
86 | "3.12",
87 | ]
88 |
89 | DEFAULT_PYTHON_VERSION = SUPPORTED_PYTHON_VERSIONS[-1]
90 |
91 | python = use_extension("@rules_python//python/extensions:python.bzl", "python")
92 |
93 | [
94 | python.toolchain(
95 | is_default = version == DEFAULT_PYTHON_VERSION,
96 | python_version = version,
97 | )
98 | for version in SUPPORTED_PYTHON_VERSIONS
99 | ]
100 |
--------------------------------------------------------------------------------
/riegeli/bzip2/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "bzip2_reader",
12 | srcs = ["bzip2_reader.cc"],
13 | hdrs = ["bzip2_reader.h"],
14 | deps = [
15 | ":bzip2_error",
16 | "//riegeli/base:arithmetic",
17 | "//riegeli/base:assert",
18 | "//riegeli/base:dependency",
19 | "//riegeli/base:initializer",
20 | "//riegeli/base:object",
21 | "//riegeli/base:status",
22 | "//riegeli/base:types",
23 | "//riegeli/bytes:buffer_options",
24 | "//riegeli/bytes:buffered_reader",
25 | "//riegeli/bytes:reader",
26 | "@bzip2//:bz2",
27 | "@com_google_absl//absl/base:core_headers",
28 | "@com_google_absl//absl/status",
29 | "@com_google_absl//absl/strings",
30 | "@com_google_absl//absl/strings:string_view",
31 | ],
32 | )
33 |
34 | cc_library(
35 | name = "bzip2_writer",
36 | srcs = ["bzip2_writer.cc"],
37 | hdrs = ["bzip2_writer.h"],
38 | deps = [
39 | ":bzip2_error",
40 | "//riegeli/base:arithmetic",
41 | "//riegeli/base:assert",
42 | "//riegeli/base:dependency",
43 | "//riegeli/base:initializer",
44 | "//riegeli/base:object",
45 | "//riegeli/base:status",
46 | "//riegeli/base:types",
47 | "//riegeli/bytes:buffer_options",
48 | "//riegeli/bytes:buffered_writer",
49 | "//riegeli/bytes:writer",
50 | "@bzip2//:bz2",
51 | "@com_google_absl//absl/base:core_headers",
52 | "@com_google_absl//absl/status",
53 | "@com_google_absl//absl/strings",
54 | "@com_google_absl//absl/strings:string_view",
55 | ],
56 | )
57 |
58 | cc_library(
59 | name = "bzip2_error",
60 | srcs = ["bzip2_error.cc"],
61 | hdrs = ["bzip2_error.h"],
62 | visibility = ["//visibility:private"],
63 | deps = [
64 | "//riegeli/base:assert",
65 | "@bzip2//:bz2",
66 | "@com_google_absl//absl/status",
67 | "@com_google_absl//absl/strings",
68 | "@com_google_absl//absl/strings:string_view",
69 | ],
70 | )
71 |
--------------------------------------------------------------------------------
/riegeli/bytes/string_reader.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/bytes/string_reader.h"
16 |
17 | #include
18 |
19 | #include
20 | #include
21 |
22 | #include "absl/base/optimization.h"
23 | #include "riegeli/base/assert.h"
24 | #include "riegeli/base/types.h"
25 | #include "riegeli/bytes/reader.h"
26 |
27 | namespace riegeli {
28 |
29 | bool StringReaderBase::PullSlow(size_t min_length, size_t recommended_length) {
30 | RIEGELI_ASSERT_LT(available(), min_length)
31 | << "Failed precondition of Reader::PullSlow(): "
32 | "enough data available, use Pull() instead";
33 | return false;
34 | }
35 |
36 | bool StringReaderBase::SeekSlow(Position new_pos) {
37 | RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
38 | << "Failed precondition of Reader::SeekSlow(): "
39 | "position in the buffer, use Seek() instead";
40 | if (ABSL_PREDICT_FALSE(!ok())) return false;
41 | RIEGELI_ASSERT_EQ(start_pos(), 0u)
42 | << "Failed invariant of StringReader: non-zero position of buffer start";
43 | // Seeking forwards. Source ends.
44 | set_cursor(limit());
45 | return false;
46 | }
47 |
48 | std::optional StringReaderBase::SizeImpl() {
49 | if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
50 | return limit_pos();
51 | }
52 |
53 | std::unique_ptr StringReaderBase::NewReaderImpl(Position initial_pos) {
54 | if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
55 | // `NewReaderImpl()` is thread-safe from this point.
56 | std::unique_ptr reader =
57 | std::make_unique>(start(), start_to_limit());
58 | reader->Seek(initial_pos);
59 | return reader;
60 | }
61 |
62 | } // namespace riegeli
63 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/hash.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/chunk_encoding/hash.h"
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include "absl/container/inlined_vector.h"
22 | #include "absl/strings/string_view.h"
23 | #include "highwayhash/hh_types.h"
24 | #include "highwayhash/highwayhash_target.h"
25 | #include "highwayhash/instruction_sets.h"
26 | #include "riegeli/base/chain.h"
27 |
28 | namespace riegeli::chunk_encoding_internal {
29 |
30 | namespace {
31 |
32 | alignas(32) const highwayhash::HHKey kHashKey = {
33 | 0x2f696c6567656952, // 'Riegeli/'
34 | 0x0a7364726f636572, // 'records\n'
35 | 0x2f696c6567656952, // 'Riegeli/'
36 | 0x0a7364726f636572, // 'records\n'
37 | };
38 |
39 | } // namespace
40 |
41 | uint64_t Hash(absl::string_view data) {
42 | highwayhash::HHResult64 result;
43 | highwayhash::InstructionSets::Run(
44 | kHashKey, data.data(), data.size(), &result);
45 | return result;
46 | }
47 |
48 | uint64_t Hash(const Chain& data) {
49 | if (const std::optional flat = data.TryFlat();
50 | flat != std::nullopt) {
51 | return Hash(*flat);
52 | }
53 | absl::InlinedVector fragments;
54 | fragments.reserve(data.blocks().size());
55 | for (const absl::string_view fragment : data.blocks()) {
56 | fragments.push_back(
57 | highwayhash::StringView{fragment.data(), fragment.size()});
58 | }
59 | highwayhash::HHResult64 result;
60 | highwayhash::InstructionSets::Run(
61 | kHashKey, fragments.data(), fragments.size(), &result);
62 | return result;
63 | }
64 |
65 | } // namespace riegeli::chunk_encoding_internal
66 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/chunk.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/chunk_encoding/chunk.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/strings/string_view.h"
21 | #include "riegeli/base/assert.h"
22 | #include "riegeli/base/chain.h"
23 | #include "riegeli/bytes/reader.h"
24 | #include "riegeli/bytes/writer.h"
25 | #include "riegeli/chunk_encoding/constants.h"
26 | #include "riegeli/chunk_encoding/hash.h"
27 |
28 | namespace riegeli {
29 |
30 | ChunkHeader::ChunkHeader(const Chain& data, ChunkType chunk_type,
31 | uint64_t num_records, uint64_t decoded_data_size) {
32 | RIEGELI_ASSERT_LE(num_records, kMaxNumRecords)
33 | << "Failed precondition of ChunkHeader::ChunkHeader(): "
34 | "number of records out of range";
35 | set_data_size(data.size());
36 | set_data_hash(chunk_encoding_internal::Hash(data));
37 | set_chunk_type_and_num_records(chunk_type, num_records);
38 | set_decoded_data_size(decoded_data_size);
39 | set_header_hash(computed_header_hash());
40 | }
41 |
42 | uint64_t ChunkHeader::computed_header_hash() const {
43 | return chunk_encoding_internal::Hash(
44 | absl::string_view(bytes() + sizeof(uint64_t), size() - sizeof(uint64_t)));
45 | }
46 |
47 | bool Chunk::WriteTo(Writer& dest) const {
48 | if (ABSL_PREDICT_FALSE(
49 | !dest.Write(absl::string_view(header.bytes(), header.size())))) {
50 | return false;
51 | }
52 | return dest.Write(data);
53 | }
54 |
55 | bool Chunk::ReadFrom(Reader& src) {
56 | if (ABSL_PREDICT_FALSE(!src.Read(header.size(), header.bytes()))) {
57 | data.Clear();
58 | return false;
59 | }
60 | return src.Read(header.data_size(), data);
61 | }
62 |
63 | } // namespace riegeli
64 |
--------------------------------------------------------------------------------
/riegeli/base/types.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_TYPES_H_
16 | #define RIEGELI_BASE_TYPES_H_
17 |
18 | #include
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | #include "absl/base/nullability.h"
25 |
26 | ABSL_POINTERS_DEFAULT_NONNULL
27 |
28 | namespace riegeli {
29 |
30 | // Position in a stream of bytes, used also for stream sizes.
31 | //
32 | // This is an unsigned integer type at least as wide as `size_t`,
33 | // `std::streamoff`, and `uint64_t`.
34 | using Position =
35 | std::common_type_t, uint64_t>;
36 |
37 | // Specifies the scope of objects to flush and the intended data durability
38 | // (without a guarantee).
39 | enum class FlushType {
40 | // Makes data written so far visible in other objects, propagating flushing
41 | // through owned dependencies of the given writer.
42 | kFromObject = 0,
43 | // Makes data written so far visible outside the process, propagating flushing
44 | // through dependencies of the given writer. This is generally the default.
45 | kFromProcess = 1,
46 | // Makes data written so far visible outside the process and durable in case
47 | // of operating system crash, propagating flushing through dependencies of the
48 | // given writer.
49 | kFromMachine = 2,
50 | };
51 |
52 | // Specifies the scope of objects to synchronize.
53 | enum class SyncType {
54 | // Propagates synchronization through owned dependencies of the given reader.
55 | kFromObject = 0,
56 | // Propagates synchronization through all dependencies of the given reader.
57 | // This is generally the default.
58 | kFromProcess = 1,
59 | };
60 |
61 | } // namespace riegeli
62 |
63 | #endif // RIEGELI_BASE_TYPES_H_
64 |
--------------------------------------------------------------------------------
/riegeli/containers/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "linear_sorted_string_set",
12 | srcs = ["linear_sorted_string_set.cc"],
13 | hdrs = ["linear_sorted_string_set.h"],
14 | deps = [
15 | "//riegeli/base:arithmetic",
16 | "//riegeli/base:assert",
17 | "//riegeli/base:compact_string",
18 | "//riegeli/base:compare",
19 | "//riegeli/base:debug",
20 | "//riegeli/base:dependency",
21 | "//riegeli/base:iterable",
22 | "//riegeli/base:stream_utils",
23 | "//riegeli/bytes:compact_string_writer",
24 | "//riegeli/bytes:reader",
25 | "//riegeli/bytes:writer",
26 | "//riegeli/endian:endian_reading",
27 | "//riegeli/varint:varint_reading",
28 | "//riegeli/varint:varint_writing",
29 | "@com_google_absl//absl/base:core_headers",
30 | "@com_google_absl//absl/numeric:bits",
31 | "@com_google_absl//absl/status",
32 | "@com_google_absl//absl/status:statusor",
33 | "@com_google_absl//absl/strings",
34 | "@com_google_absl//absl/strings:string_view",
35 | ],
36 | )
37 |
38 | cc_library(
39 | name = "chunked_sorted_string_set",
40 | srcs = ["chunked_sorted_string_set.cc"],
41 | hdrs = ["chunked_sorted_string_set.h"],
42 | deps = [
43 | ":linear_sorted_string_set",
44 | "//riegeli/base:arithmetic",
45 | "//riegeli/base:assert",
46 | "//riegeli/base:binary_search",
47 | "//riegeli/base:compact_string",
48 | "//riegeli/base:compare",
49 | "//riegeli/base:debug",
50 | "//riegeli/base:dependency",
51 | "//riegeli/base:iterable",
52 | "//riegeli/base:memory_estimator",
53 | "//riegeli/bytes:reader",
54 | "//riegeli/bytes:writer",
55 | "//riegeli/varint:varint_reading",
56 | "//riegeli/varint:varint_writing",
57 | "@com_google_absl//absl/base:core_headers",
58 | "@com_google_absl//absl/container:inlined_vector",
59 | "@com_google_absl//absl/status",
60 | "@com_google_absl//absl/status:statusor",
61 | "@com_google_absl//absl/strings",
62 | "@com_google_absl//absl/strings:string_view",
63 | ],
64 | )
65 |
--------------------------------------------------------------------------------
/riegeli/base/memory_estimator.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/memory_estimator.h"
16 |
17 | #ifdef __GXX_RTTI
18 | #include // IWYU pragma: keep
19 | #endif
20 | #include
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #include "absl/base/nullability.h"
30 | #include "absl/container/flat_hash_set.h"
31 |
32 | ABSL_POINTERS_DEFAULT_NONNULL
33 |
34 | namespace riegeli {
35 |
36 | bool MemoryEstimatorDefault::RegisterNodeImpl(const void* absl_nullable ptr) {
37 | return ptr != nullptr && objects_seen_.insert(ptr).second;
38 | }
39 |
40 | void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl() {
41 | unknown_types_no_rtti_ = true;
42 | }
43 |
44 | void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl(
45 | std::type_index index) {
46 | unknown_types_.insert(index);
47 | }
48 |
49 | std::vector MemoryEstimatorReportingUnknownTypes::UnknownTypes()
50 | const {
51 | std::vector result;
52 | result.reserve((unknown_types_no_rtti_ ? 1 : 0) + unknown_types_.size());
53 | if (unknown_types_no_rtti_) result.emplace_back("");
54 | for (const std::type_index index : unknown_types_) {
55 | #ifdef __GXX_RTTI
56 | int status = 0;
57 | char* const absl_nullable demangled =
58 | abi::__cxa_demangle(index.name(), nullptr, nullptr, &status);
59 | if (status == 0 && demangled != nullptr) {
60 | result.emplace_back(demangled);
61 | std::free(demangled);
62 | continue;
63 | }
64 | #endif
65 | result.emplace_back(index.name());
66 | }
67 | std::sort(result.begin(), result.end());
68 | return result;
69 | }
70 |
71 | } // namespace riegeli
72 |
--------------------------------------------------------------------------------
/python/build_pip_package.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2018 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Builds a pip package for riegeli.
17 | #
18 | # Usage (where DEST is a where to write the output, e.g. ~/riegeli-dist):
19 | # $ bazel build -c opt python:build_pip_package
20 | # $ bazel-bin/python/build_pip_package --dest DEST --sdist --bdist
21 |
22 | set -e
23 |
24 | function is_absolute {
25 | [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]]
26 | }
27 |
28 | function real_path() {
29 | if is_absolute "$1"; then
30 | printf "%s" "$1"
31 | else
32 | printf "%s/%s" "$PWD" "${1#./}"
33 | fi
34 | }
35 |
36 | function build_sdist() {
37 | local dest=$1
38 | python python/setup.py sdist --dist-dir "$dest"
39 | }
40 |
41 | function build_bdist() {
42 | local dest=$1
43 | cd bazel-bin/python/build_pip_package.runfiles/com_google_riegeli/python
44 | python setup.py bdist_wheel --dist-dir "$dest"
45 | cd -
46 | }
47 |
48 | function main() {
49 | local dest=
50 | local sdist=false
51 | local bdist=false
52 | while [[ $# -gt 0 ]]; do
53 | if [[ $1 == --dest ]]; then
54 | shift
55 | dest=$(real_path "$1")
56 | elif [[ $1 == --sdist ]]; then
57 | sdist=true
58 | elif [[ $1 == --bdist ]]; then
59 | bdist=true
60 | else
61 | printf "Unknown flag: %s\n" "$1" >&2
62 | exit 1
63 | fi
64 | shift
65 | done
66 | if [[ -z $dest ]]; then
67 | printf "Missing required flag: --dest DIRECTORY\n" >&2
68 | exit 1
69 | fi
70 | if [[ $sdist != true ]] && [[ $bdist != true ]]; then
71 | printf "Nothing to do: missing --sdist or --bdist\n" >&2
72 | exit 1
73 | fi
74 | mkdir -p -- "$dest"
75 | if [[ $sdist = true ]]; then
76 | build_sdist "$dest"
77 | fi
78 | if [[ $bdist = true ]]; then
79 | build_bdist "$dest"
80 | fi
81 | }
82 |
83 | main "$@"
84 |
--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """PIP package setup for Riegeli."""
15 |
16 | import setuptools
17 | from setuptools import dist
18 |
19 | with open('README.md', 'r') as fh:
20 | long_description = fh.read()
21 |
22 |
23 | class BinaryDistribution(dist.Distribution):
24 | """This class is needed in order to create OS specific wheels."""
25 |
26 | def has_ext_modules(self):
27 | return True
28 |
29 |
30 | setuptools.setup(
31 | name='riegeli',
32 | version='0.0.1',
33 | description='File format for storing a sequence of records',
34 | long_description=long_description,
35 | long_description_content_type='text/markdown',
36 | url='https://github.com/google/riegeli',
37 | author='Google LLC',
38 | author_email='compression-dev@google.com',
39 | license='Apache License, Version 2.0',
40 | python_requires='>=3.5,<4',
41 | install_requires=[
42 | 'protobuf>=3.8.0,<4',
43 | ],
44 | extras_require={
45 | 'tensorflow': ['tensorflow>=1.15,<3'],
46 | },
47 | packages=setuptools.find_packages(),
48 | include_package_data=True,
49 | package_data={'': ['**/*.so']},
50 | distclass=BinaryDistribution,
51 | classifiers=[
52 | 'Programming Language :: Python',
53 | 'Intended Audience :: Developers',
54 | 'Programming Language :: Python :: 3',
55 | 'Programming Language :: Python :: 3.5',
56 | 'Programming Language :: Python :: 3.6',
57 | 'Programming Language :: Python :: 3.7',
58 | 'Programming Language :: Python :: 3.8',
59 | 'License :: OSI Approved :: Apache Software License',
60 | 'Operating System :: OS Independent',
61 | 'Topic :: Software Development :: Libraries',
62 | 'Topic :: Software Development :: Libraries :: Python Modules',
63 | ],
64 | )
65 |
--------------------------------------------------------------------------------
/riegeli/tensorflow/ops/riegeli_dataset_ops.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "tensorflow/core/framework/common_shape_fns.h"
16 | #include "tensorflow/core/framework/op.h"
17 | #include "tensorflow/core/framework/shape_inference.h"
18 |
19 | namespace riegeli::tensorflow {
20 |
21 | REGISTER_OP("RiegeliDataset")
22 | .Input("filenames: string")
23 | .Input("min_buffer_size: int64")
24 | .Input("max_buffer_size: int64")
25 | .Output("handle: variant")
26 | .SetIsStateful()
27 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
28 | ::tensorflow::shape_inference::ShapeHandle unused;
29 | // `filenames` must be a scalar or a vector.
30 | TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
31 | // `min_buffer_size` could only be a scalar.
32 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
33 | // `max_buffer_size` could only be a scalar.
34 | TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
35 | return ::tensorflow::shape_inference::ScalarShape(c);
36 | })
37 | .Doc(R"doc(
38 | Creates a dataset that emits the records from one or more Riegeli/records files.
39 |
40 | filenames: A scalar or vector containing the name(s) of the file(s) to be
41 | read.
42 | min_buffer_size: Tunes the minimal buffer size, which determines how much data
43 | at a time is typically read from the file. The actual buffer size changes
44 | between min_buffer_size and max_buffer_size depending on the access pattern.
45 | max_buffer_size: Tunes the maximal buffer size, which determines how much data
46 | at a time is typically read from the file. The actual buffer size changes
47 | between min_buffer_size and max_buffer_size depending on the access pattern.
48 | )doc");
49 |
50 | } // namespace riegeli::tensorflow
51 |
--------------------------------------------------------------------------------
/riegeli/bytes/fd_internal_for_cc.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_
16 | #define RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_
17 |
18 | // Warning: Do not include this header in other headers, because the definition
19 | // of `off_t` depends on `_FILE_OFFSET_BITS` which can reliably be set only
20 | // in a standalone compilation unit.
21 |
22 | #ifdef _WIN32
23 | #include
24 | #endif
25 | #include
26 | #include
27 | #ifndef _WIN32
28 | #include
29 | #endif
30 |
31 | #include "absl/strings/string_view.h"
32 |
33 | namespace riegeli::fd_internal {
34 |
35 | #ifndef _WIN32
36 |
37 | using Offset = off_t;
38 |
39 | inline Offset LSeek(int fd, Offset offset, int whence) {
40 | return lseek(fd, offset, whence);
41 | }
42 |
43 | inline constexpr absl::string_view kLSeekFunctionName = "lseek()";
44 |
45 | using StatInfo = struct stat;
46 |
47 | inline int FStat(int fd, StatInfo* stat_info) { return fstat(fd, stat_info); }
48 |
49 | inline constexpr absl::string_view kFStatFunctionName = "fstat()";
50 |
51 | #else // _WIN32
52 |
53 | using Offset = __int64;
54 |
55 | inline Offset LSeek(int fd, Offset offset, int whence) {
56 | return _lseeki64(fd, offset, whence);
57 | }
58 |
59 | inline constexpr absl::string_view kLSeekFunctionName = "_lseeki64()";
60 |
61 | // `struct __stat64` in a namespace does not work in MSVC due to a bug regarding
62 | // https://en.cppreference.com/w/cpp/language/elaborated_type_specifier.
63 | using StatInfo = struct ::__stat64;
64 |
65 | inline int FStat(int fd, StatInfo* stat_info) {
66 | return _fstat64(fd, stat_info);
67 | }
68 |
69 | inline constexpr absl::string_view kFStatFunctionName = "_fstat64()";
70 |
71 | #endif // _WIN32
72 |
73 | } // namespace riegeli::fd_internal
74 |
75 | #endif // RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_
76 |
--------------------------------------------------------------------------------
/riegeli/xz/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "xz_reader",
12 | srcs = ["xz_reader.cc"],
13 | hdrs = ["xz_reader.h"],
14 | deps = [
15 | ":xz_error",
16 | "//riegeli/base:arithmetic",
17 | "//riegeli/base:assert",
18 | "//riegeli/base:compare",
19 | "//riegeli/base:dependency",
20 | "//riegeli/base:initializer",
21 | "//riegeli/base:object",
22 | "//riegeli/base:recycling_pool",
23 | "//riegeli/base:status",
24 | "//riegeli/base:types",
25 | "//riegeli/bytes:buffer_options",
26 | "//riegeli/bytes:buffered_reader",
27 | "//riegeli/bytes:reader",
28 | "@com_google_absl//absl/base:core_headers",
29 | "@com_google_absl//absl/status",
30 | "@com_google_absl//absl/strings",
31 | "@com_google_absl//absl/strings:string_view",
32 | "@xz//:lzma",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "xz_writer",
38 | srcs = ["xz_writer.cc"],
39 | hdrs = ["xz_writer.h"],
40 | deps = [
41 | ":xz_error",
42 | ":xz_reader",
43 | "//riegeli/base:arithmetic",
44 | "//riegeli/base:assert",
45 | "//riegeli/base:compare",
46 | "//riegeli/base:dependency",
47 | "//riegeli/base:initializer",
48 | "//riegeli/base:object",
49 | "//riegeli/base:recycling_pool",
50 | "//riegeli/base:status",
51 | "//riegeli/base:types",
52 | "//riegeli/bytes:buffer_options",
53 | "//riegeli/bytes:buffered_writer",
54 | "//riegeli/bytes:reader",
55 | "//riegeli/bytes:writer",
56 | "@com_google_absl//absl/base:core_headers",
57 | "@com_google_absl//absl/status",
58 | "@com_google_absl//absl/strings",
59 | "@com_google_absl//absl/strings:string_view",
60 | "@xz//:lzma",
61 | ],
62 | )
63 |
64 | cc_library(
65 | name = "xz_error",
66 | srcs = ["xz_error.cc"],
67 | hdrs = ["xz_error.h"],
68 | visibility = ["//visibility:private"],
69 | deps = [
70 | "//riegeli/base:assert",
71 | "@com_google_absl//absl/status",
72 | "@com_google_absl//absl/strings",
73 | "@com_google_absl//absl/strings:string_view",
74 | "@xz//:lzma",
75 | ],
76 | )
77 |
--------------------------------------------------------------------------------
/riegeli/text/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "concat",
12 | hdrs = ["concat.h"],
13 | deps = [
14 | "//riegeli/base:initializer",
15 | "//riegeli/bytes:ostream_writer",
16 | "//riegeli/bytes:stringify",
17 | "//riegeli/bytes:stringify_writer",
18 | "//riegeli/bytes:writer",
19 | "@com_google_absl//absl/base:core_headers",
20 | ],
21 | )
22 |
23 | cc_library(
24 | name = "write_int",
25 | srcs = ["write_int.cc"],
26 | hdrs = ["write_int.h"],
27 | deps = [
28 | "//riegeli/base:arithmetic",
29 | "//riegeli/base:assert",
30 | "//riegeli/base:types",
31 | "//riegeli/bytes:ostream_writer",
32 | "//riegeli/bytes:write_int_internal",
33 | "//riegeli/bytes:writer",
34 | "//riegeli/endian:endian_writing",
35 | "@com_google_absl//absl/base:core_headers",
36 | "@com_google_absl//absl/numeric:bits",
37 | "@com_google_absl//absl/numeric:int128",
38 | "@com_google_absl//absl/strings:string_view",
39 | ],
40 | )
41 |
42 | cc_library(
43 | name = "ascii_align",
44 | hdrs = ["ascii_align.h"],
45 | deps = [
46 | ":concat",
47 | "//riegeli/base:arithmetic",
48 | "//riegeli/base:assert",
49 | "//riegeli/base:byte_fill",
50 | "//riegeli/base:chain",
51 | "//riegeli/base:initializer",
52 | "//riegeli/base:type_traits",
53 | "//riegeli/base:types",
54 | "//riegeli/bytes:ostream_writer",
55 | "//riegeli/bytes:restricted_chain_writer",
56 | "//riegeli/bytes:stringify",
57 | "//riegeli/bytes:stringify_writer",
58 | "//riegeli/bytes:writer",
59 | "@com_google_absl//absl/base:core_headers",
60 | ],
61 | )
62 |
63 | cc_library(
64 | name = "join",
65 | hdrs = ["join.h"],
66 | deps = [
67 | "//riegeli/base:initializer",
68 | "//riegeli/base:iterable",
69 | "//riegeli/base:types",
70 | "//riegeli/bytes:ostream_writer",
71 | "//riegeli/bytes:stringify",
72 | "//riegeli/bytes:stringify_writer",
73 | "//riegeli/bytes:writer",
74 | "@com_google_absl//absl/base:core_headers",
75 | "@com_google_absl//absl/strings:string_view",
76 | ],
77 | )
78 |
--------------------------------------------------------------------------------
/riegeli/chunk_encoding/brotli_encoder_selection.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/chunk_encoding/brotli_encoder_selection.h"
16 |
17 | #include
18 |
19 | #include "absl/base/attributes.h"
20 | #include "absl/status/status.h"
21 | #include "riegeli/base/assert.h"
22 | #include "riegeli/base/chain.h"
23 | #include "riegeli/base/maker.h"
24 | #include "riegeli/base/recycling_pool.h"
25 | #include "riegeli/brotli/brotli_writer.h"
26 | #include "riegeli/bytes/chain_writer.h"
27 | #include "riegeli/bytes/null_writer.h"
28 | #include "riegeli/bytes/writer.h"
29 | #include "riegeli/chunk_encoding/compressor_options.h"
30 |
31 | namespace riegeli::chunk_encoding_internal {
32 |
33 | ABSL_ATTRIBUTE_WEAK std::unique_ptr NewBrotliWriter(
34 | Chain* compressed, const CompressorOptions& compressor_options,
35 | ABSL_ATTRIBUTE_UNUSED const RecyclingPoolOptions& recycling_pool_options) {
36 | switch (compressor_options.brotli_encoder()) {
37 | case BrotliEncoder::kRBrotliOrCBrotli:
38 | case BrotliEncoder::kCBrotli:
39 | return NewCBrotliWriter(compressed, compressor_options);
40 | case BrotliEncoder::kRBrotli: {
41 | std::unique_ptr writer = std::make_unique();
42 | writer->Fail(absl::UnimplementedError("Rust Brotli not available"));
43 | return writer;
44 | }
45 | }
46 | RIEGELI_ASSUME_UNREACHABLE()
47 | << "Unknown Brotli encoder: "
48 | << static_cast(compressor_options.brotli_encoder());
49 | }
50 |
51 | std::unique_ptr NewCBrotliWriter(
52 | Chain* compressed, const CompressorOptions& compressor_options) {
53 | return std::make_unique>>(
54 | riegeli::Maker(compressed),
55 | BrotliWriterBase::Options()
56 | .set_compression_level(compressor_options.compression_level())
57 | .set_window_log(compressor_options.brotli_window_log()));
58 | }
59 |
60 | } // namespace riegeli::chunk_encoding_internal
61 |
--------------------------------------------------------------------------------
/riegeli/snappy/snappy_streams.h:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_SNAPPY_SNAPPY_STREAMS_H_
16 | #define RIEGELI_SNAPPY_SNAPPY_STREAMS_H_
17 |
18 | #include
19 |
20 | #include "riegeli/base/assert.h"
21 | #include "riegeli/base/types.h"
22 | #include "riegeli/bytes/reader.h"
23 | #include "riegeli/bytes/writer.h"
24 | #include "snappy-sinksource.h"
25 |
26 | namespace riegeli::snappy_internal {
27 |
28 | // Adapts a `Writer` to a `snappy::Sink`.
29 | class WriterSnappySink : public snappy::Sink {
30 | public:
31 | explicit WriterSnappySink(Writer* dest)
32 | : dest_(RIEGELI_EVAL_ASSERT_NOTNULL(dest)) {}
33 |
34 | WriterSnappySink(const WriterSnappySink&) = delete;
35 | WriterSnappySink& operator=(const WriterSnappySink&) = delete;
36 |
37 | void Append(const char* src, size_t length) override;
38 | char* GetAppendBuffer(size_t length, char* scratch) override;
39 | void AppendAndTakeOwnership(char* src, size_t length,
40 | void (*deleter)(void*, const char*, size_t),
41 | void* deleter_arg) override;
42 | char* GetAppendBufferVariable(size_t min_length, size_t recommended_length,
43 | char* scratch, size_t scratch_length,
44 | size_t* result_length) override;
45 |
46 | private:
47 | Writer* dest_;
48 | };
49 |
50 | // Adapts a `Reader` to a `snappy::Source`.
51 | class ReaderSnappySource : public snappy::Source {
52 | public:
53 | explicit ReaderSnappySource(Reader* src, Position size)
54 | : src_(RIEGELI_EVAL_ASSERT_NOTNULL(src)), size_(size) {}
55 |
56 | ReaderSnappySource(const ReaderSnappySource&) = delete;
57 | ReaderSnappySource& operator=(const ReaderSnappySource&) = delete;
58 |
59 | size_t Available() const override;
60 | const char* Peek(size_t* length) override;
61 | void Skip(size_t length) override;
62 |
63 | private:
64 | Reader* src_;
65 | Position size_;
66 | };
67 |
68 | } // namespace riegeli::snappy_internal
69 |
70 | #endif // RIEGELI_SNAPPY_SNAPPY_STREAMS_H_
71 |
--------------------------------------------------------------------------------
/riegeli/base/parallelism.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/parallelism.h"
16 |
17 | #include
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include "absl/base/thread_annotations.h"
24 | #include "absl/functional/any_invocable.h"
25 | #include "absl/synchronization/mutex.h"
26 | #include "absl/time/time.h"
27 | #include "riegeli/base/assert.h"
28 | #include "riegeli/base/global.h"
29 |
30 | namespace riegeli::internal {
31 |
32 | ThreadPool::~ThreadPool() {
33 | absl::MutexLock lock(mutex_);
34 | exiting_ = true;
35 | mutex_.Await(absl::Condition(
36 | +[](size_t* num_threads) { return *num_threads == 0; }, &num_threads_));
37 | }
38 |
39 | void ThreadPool::Schedule(absl::AnyInvocable task) {
40 | {
41 | absl::MutexLock lock(mutex_);
42 | RIEGELI_ASSERT(!exiting_)
43 | << "Failed precondition of ThreadPool::Schedule(): no new threads may "
44 | "be scheduled while the thread pool is exiting";
45 | tasks_.push_back(std::move(task));
46 | if (num_idle_threads_ >= tasks_.size()) return;
47 | ++num_threads_;
48 | }
49 | std::thread([this] {
50 | for (;;) {
51 | absl::ReleasableMutexLock lock(mutex_);
52 | ++num_idle_threads_;
53 | mutex_.AwaitWithTimeout(
54 | absl::Condition(
55 | +[](ThreadPool* self)
56 | ABSL_EXCLUSIVE_LOCKS_REQUIRED(self->mutex_) {
57 | return !self->tasks_.empty() || self->exiting_;
58 | },
59 | this),
60 | absl::Seconds(1));
61 | --num_idle_threads_;
62 | if (tasks_.empty() || exiting_) {
63 | --num_threads_;
64 | return;
65 | }
66 | absl::AnyInvocable task = std::move(tasks_.front());
67 | tasks_.pop_front();
68 | lock.Release();
69 | std::move(task)();
70 | }
71 | }).detach();
72 | }
73 |
74 | ThreadPool& ThreadPool::global() {
75 | return Global([] { return ThreadPool(); });
76 | }
77 |
78 | } // namespace riegeli::internal
79 |
--------------------------------------------------------------------------------
/riegeli/base/cord_iterator_span.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2025 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/base/cord_iterator_span.h"
16 |
17 | #include
18 |
19 | #include
20 | #include
21 |
22 | #include "absl/base/nullability.h"
23 | #include "absl/base/optimization.h"
24 | #include "absl/strings/cord.h"
25 | #include "absl/strings/string_view.h"
26 | #include "riegeli/base/assert.h"
27 | #include "riegeli/base/string_utils.h"
28 |
29 | ABSL_POINTERS_DEFAULT_NONNULL
30 |
31 | namespace riegeli {
32 |
33 | void CordIteratorSpan::ReadSlow(absl::Cord::CharIterator& src, size_t length,
34 | char* dest) {
35 | absl::string_view chunk = absl::Cord::ChunkRemaining(src);
36 | RIEGELI_ASSERT_LT(chunk.size(), length)
37 | << "Failed precondition of CordIteratorSpan::ReadSlow(): "
38 | "enough data available, use Read() instead";
39 | do {
40 | std::memcpy(dest, chunk.data(), chunk.size());
41 | absl::Cord::Advance(&src, chunk.size());
42 | dest += chunk.size();
43 | length -= chunk.size();
44 | chunk = absl::Cord::ChunkRemaining(src);
45 | } while (chunk.size() < length);
46 | std::memcpy(dest, chunk.data(), length);
47 | absl::Cord::Advance(&src, length);
48 | }
49 |
50 | absl::string_view CordIteratorSpan::ToStringView(std::string& scratch) && {
51 | absl::Cord::CharIterator& iter = *iterator_;
52 | size_t length = length_;
53 | if (length == 0) return absl::string_view();
54 | absl::string_view chunk = absl::Cord::ChunkRemaining(iter);
55 | if (ABSL_PREDICT_TRUE(chunk.size() >= length)) {
56 | absl::Cord::Advance(&iter, length);
57 | return chunk.substr(0, length);
58 | }
59 | scratch.clear();
60 | ResizeStringAmortized(scratch, length);
61 | ReadSlow(iter, length, scratch.data());
62 | return scratch;
63 | }
64 |
65 | void CordIteratorSpan::ToString(std::string& dest) && {
66 | absl::Cord::CharIterator& iter = *iterator_;
67 | size_t length = length_;
68 | dest.clear();
69 | dest.resize(length);
70 | Read(iter, length, dest.data());
71 | }
72 |
73 | } // namespace riegeli
74 |
--------------------------------------------------------------------------------
/riegeli/snappy/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "snappy_reader",
12 | srcs = ["snappy_reader.cc"],
13 | hdrs = ["snappy_reader.h"],
14 | deps = [
15 | ":snappy_streams",
16 | "//riegeli/base:assert",
17 | "//riegeli/base:chain",
18 | "//riegeli/base:dependency",
19 | "//riegeli/base:initializer",
20 | "//riegeli/base:object",
21 | "//riegeli/base:status",
22 | "//riegeli/base:types",
23 | "//riegeli/bytes:chain_reader",
24 | "//riegeli/bytes:chain_writer",
25 | "//riegeli/bytes:reader",
26 | "//riegeli/bytes:writer",
27 | "//riegeli/varint:varint_reading",
28 | "@com_google_absl//absl/base:core_headers",
29 | "@com_google_absl//absl/status",
30 | "@com_google_absl//absl/strings",
31 | "@snappy",
32 | ],
33 | )
34 |
35 | cc_library(
36 | name = "snappy_writer",
37 | srcs = ["snappy_writer.cc"],
38 | hdrs = ["snappy_writer.h"],
39 | deps = [
40 | ":snappy_streams",
41 | "//riegeli/base:arithmetic",
42 | "//riegeli/base:assert",
43 | "//riegeli/base:buffering",
44 | "//riegeli/base:byte_fill",
45 | "//riegeli/base:chain",
46 | "//riegeli/base:dependency",
47 | "//riegeli/base:external_ref",
48 | "//riegeli/base:initializer",
49 | "//riegeli/base:null_safe_memcpy",
50 | "//riegeli/base:object",
51 | "//riegeli/base:status",
52 | "//riegeli/base:types",
53 | "//riegeli/bytes:chain_reader",
54 | "//riegeli/bytes:reader",
55 | "//riegeli/bytes:writer",
56 | "@com_google_absl//absl/base:core_headers",
57 | "@com_google_absl//absl/status",
58 | "@com_google_absl//absl/strings",
59 | "@com_google_absl//absl/strings:cord",
60 | "@com_google_absl//absl/types:span",
61 | "@snappy",
62 | ],
63 | )
64 |
65 | cc_library(
66 | name = "snappy_streams",
67 | srcs = ["snappy_streams.cc"],
68 | hdrs = ["snappy_streams.h"],
69 | visibility = ["//visibility:private"],
70 | deps = [
71 | "//riegeli/base:arithmetic",
72 | "//riegeli/base:assert",
73 | "//riegeli/base:external_ref",
74 | "//riegeli/base:types",
75 | "//riegeli/bytes:reader",
76 | "//riegeli/bytes:writer",
77 | "@com_google_absl//absl/base:core_headers",
78 | "@com_google_absl//absl/strings:string_view",
79 | "@snappy",
80 | ],
81 | )
82 |
--------------------------------------------------------------------------------
/riegeli/zlib/zlib_error.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/zlib/zlib_error.h"
16 |
17 | #include
18 |
19 | #include "absl/status/status.h"
20 | #include "absl/strings/str_cat.h"
21 | #include "absl/strings/string_view.h"
22 | #include "riegeli/base/assert.h"
23 | #include "zlib.h"
24 |
25 | namespace riegeli::zlib_internal {
26 |
27 | absl::Status ZlibErrorToStatus(absl::string_view operation, int zlib_code,
28 | const char* details) {
29 | absl::StatusCode code;
30 | switch (zlib_code) {
31 | case Z_OK:
32 | return absl::OkStatus();
33 | case Z_NEED_DICT:
34 | case Z_DATA_ERROR:
35 | code = absl::StatusCode::kInvalidArgument;
36 | break;
37 | case Z_MEM_ERROR:
38 | code = absl::StatusCode::kResourceExhausted;
39 | break;
40 | default:
41 | // Should not happen.
42 | code = absl::StatusCode::kInternal;
43 | break;
44 | }
45 | std::string message = absl::StrCat(operation, " failed");
46 | if (details == nullptr) {
47 | switch (zlib_code) {
48 | case Z_OK:
49 | RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
50 | case Z_STREAM_END:
51 | details = "stream end";
52 | break;
53 | case Z_NEED_DICT:
54 | details = "need dictionary";
55 | break;
56 | case Z_ERRNO:
57 | details = "file error";
58 | break;
59 | case Z_STREAM_ERROR:
60 | details = "stream error";
61 | break;
62 | case Z_DATA_ERROR:
63 | details = "data error";
64 | break;
65 | case Z_MEM_ERROR:
66 | details = "insufficient memory";
67 | break;
68 | case Z_BUF_ERROR:
69 | details = "buffer error";
70 | break;
71 | case Z_VERSION_ERROR:
72 | details = "incompatible version";
73 | break;
74 | default:
75 | absl::StrAppend(&message, ": unknown zlib error code: ", zlib_code);
76 | break;
77 | }
78 | }
79 | if (details != nullptr) absl::StrAppend(&message, ": ", details);
80 | return absl::Status(code, message);
81 | }
82 |
83 | } // namespace riegeli::zlib_internal
84 |
--------------------------------------------------------------------------------
/riegeli/base/type_id.h:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_TYPE_ID_H_
16 | #define RIEGELI_BASE_TYPE_ID_H_
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | #include "absl/base/nullability.h"
23 | #include "riegeli/base/compare.h"
24 |
25 | ABSL_POINTERS_DEFAULT_NONNULL
26 |
27 | namespace riegeli {
28 |
29 | // `TypeId::For()` is a token which is equal to `TypeId::For()` whenever
30 | // `A` and `B` are the same type.
31 | //
32 | // `TypeId()` is another value not equal to any other.
33 | class ABSL_NULLABILITY_COMPATIBLE TypeId : public WithCompare {
34 | public:
35 | constexpr TypeId() = default;
36 | /*implicit*/ constexpr TypeId(std::nullptr_t) noexcept {}
37 |
38 | TypeId(const TypeId& that) = default;
39 | TypeId& operator=(const TypeId& that) = default;
40 |
41 | template
42 | static constexpr TypeId For();
43 |
44 | friend constexpr bool operator==(TypeId a, TypeId b) {
45 | return a.ptr_ == b.ptr_;
46 | }
47 | friend StrongOrdering RIEGELI_COMPARE(TypeId a, TypeId b) {
48 | if (std::less<>()(a.ptr_, b.ptr_)) return StrongOrdering::less;
49 | if (std::greater<>()(a.ptr_, b.ptr_)) return StrongOrdering::greater;
50 | return StrongOrdering::equal;
51 | }
52 |
53 | template
54 | friend HashState AbslHashValue(HashState hash_state, TypeId self) {
55 | return HashState::combine(std::move(hash_state), self.ptr_);
56 | }
57 |
58 | private:
59 | using pointer = void*; // For `ABSL_NULLABILITY_COMPATIBLE`.
60 |
61 | template
62 | struct TypeIdToken;
63 |
64 | explicit constexpr TypeId(const void* ptr) : ptr_(ptr) {}
65 |
66 | const void* absl_nullable ptr_ = nullptr;
67 | };
68 |
69 | // Implementation details follow.
70 |
71 | template
72 | struct TypeId::TypeIdToken {
73 | static const char token;
74 | };
75 |
76 | template
77 | const char TypeId::TypeIdToken::token = '\0';
78 |
79 | template
80 | constexpr TypeId TypeId::For() {
81 | return TypeId(&TypeIdToken::token);
82 | }
83 |
84 | } // namespace riegeli
85 |
86 | #endif // RIEGELI_BASE_TYPE_ID_H_
87 |
--------------------------------------------------------------------------------
/riegeli/zlib/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "zlib_reader",
12 | srcs = [
13 | "zlib_dictionary.h",
14 | "zlib_reader.cc",
15 | ],
16 | hdrs = ["zlib_reader.h"],
17 | deps = [
18 | ":zlib_error",
19 | "//riegeli/base:arithmetic",
20 | "//riegeli/base:assert",
21 | "//riegeli/base:bytes_ref",
22 | "//riegeli/base:dependency",
23 | "//riegeli/base:initializer",
24 | "//riegeli/base:object",
25 | "//riegeli/base:recycling_pool",
26 | "//riegeli/base:shared_ptr",
27 | "//riegeli/base:status",
28 | "//riegeli/base:types",
29 | "//riegeli/bytes:buffer_options",
30 | "//riegeli/bytes:buffered_reader",
31 | "//riegeli/bytes:reader",
32 | "//riegeli/endian:endian_reading",
33 | "@com_google_absl//absl/base:core_headers",
34 | "@com_google_absl//absl/status",
35 | "@com_google_absl//absl/strings",
36 | "@com_google_absl//absl/strings:string_view",
37 | "@zlib",
38 | ],
39 | )
40 |
41 | cc_library(
42 | name = "zlib_writer",
43 | srcs = [
44 | "zlib_dictionary.h",
45 | "zlib_writer.cc",
46 | ],
47 | hdrs = ["zlib_writer.h"],
48 | deps = [
49 | ":zlib_error",
50 | ":zlib_reader",
51 | "//riegeli/base:arithmetic",
52 | "//riegeli/base:assert",
53 | "//riegeli/base:bytes_ref",
54 | "//riegeli/base:compare",
55 | "//riegeli/base:dependency",
56 | "//riegeli/base:initializer",
57 | "//riegeli/base:object",
58 | "//riegeli/base:recycling_pool",
59 | "//riegeli/base:shared_ptr",
60 | "//riegeli/base:status",
61 | "//riegeli/base:types",
62 | "//riegeli/bytes:buffer_options",
63 | "//riegeli/bytes:buffered_writer",
64 | "//riegeli/bytes:reader",
65 | "//riegeli/bytes:writer",
66 | "@com_google_absl//absl/base:core_headers",
67 | "@com_google_absl//absl/status",
68 | "@com_google_absl//absl/strings",
69 | "@com_google_absl//absl/strings:string_view",
70 | "@zlib",
71 | ],
72 | )
73 |
74 | cc_library(
75 | name = "zlib_error",
76 | srcs = ["zlib_error.cc"],
77 | hdrs = ["zlib_error.h"],
78 | visibility = ["//visibility:private"],
79 | deps = [
80 | "//riegeli/base:assert",
81 | "@com_google_absl//absl/base:core_headers",
82 | "@com_google_absl//absl/status",
83 | "@com_google_absl//absl/strings",
84 | "@com_google_absl//absl/strings:string_view",
85 | "@zlib",
86 | ],
87 | )
88 |
--------------------------------------------------------------------------------
/riegeli/bytes/array_backward_writer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/bytes/array_backward_writer.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/strings/string_view.h"
21 | #include "absl/types/span.h"
22 | #include "riegeli/base/arithmetic.h"
23 | #include "riegeli/base/assert.h"
24 | #include "riegeli/base/types.h"
25 |
26 | namespace riegeli {
27 |
28 | bool ArrayBackwardWriterBase::PushBehindScratch(size_t recommended_length) {
29 | RIEGELI_ASSERT_EQ(available(), 0u)
30 | << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): "
31 | "some space available, use Push() instead";
32 | RIEGELI_ASSERT(!scratch_used())
33 | << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): "
34 | "scratch used";
35 | return ForcePushUsingScratch();
36 | }
37 |
38 | bool ArrayBackwardWriterBase::WriteBehindScratch(absl::string_view src) {
39 | RIEGELI_ASSERT_LT(available(), src.size())
40 | << "Failed precondition of "
41 | "PushableBackwardWriter::WriteBehindScratch(string_view): "
42 | "enough space available, use Write(string_view) instead";
43 | RIEGELI_ASSERT(!scratch_used())
44 | << "Failed precondition of "
45 | "PushableBackwardWriter::WriteBehindScratch(string_view): "
46 | "scratch used";
47 | return FailOverflow();
48 | }
49 |
50 | bool ArrayBackwardWriterBase::FlushBehindScratch(FlushType flush_type) {
51 | RIEGELI_ASSERT(!scratch_used())
52 | << "Failed precondition of PushableBackwardWriter::FlushBehindScratch(): "
53 | "scratch used";
54 | if (ABSL_PREDICT_FALSE(!ok())) return false;
55 | written_ = absl::MakeSpan(cursor(), start_to_cursor());
56 | return true;
57 | }
58 |
59 | bool ArrayBackwardWriterBase::TruncateBehindScratch(Position new_size) {
60 | RIEGELI_ASSERT(!scratch_used())
61 | << "Failed precondition of "
62 | "PushableBackwardWriter::TruncateBehindScratch(): "
63 | "scratch used";
64 | if (ABSL_PREDICT_FALSE(!ok())) return false;
65 | if (ABSL_PREDICT_FALSE(new_size > start_to_cursor())) return false;
66 | set_cursor(start() - IntCast(new_size));
67 | return true;
68 | }
69 |
70 | } // namespace riegeli
71 |
--------------------------------------------------------------------------------
/riegeli/records/tools/tfrecord_recognizer.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2018 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/records/tools/tfrecord_recognizer.h"
16 |
17 | #include
18 |
19 | #include "absl/base/optimization.h"
20 | #include "absl/status/status.h"
21 | #include "riegeli/base/any.h"
22 | #include "riegeli/base/maker.h"
23 | #include "riegeli/bytes/reader.h"
24 | #include "riegeli/endian/endian_reading.h"
25 | #include "riegeli/zlib/zlib_reader.h"
26 | #include "tensorflow/core/lib/hash/crc32c.h"
27 | #include "tensorflow/core/lib/io/record_reader.h"
28 |
29 | namespace riegeli {
30 |
31 | bool TFRecordRecognizer::CheckFileFormat(
32 | tensorflow::io::RecordReaderOptions& record_reader_options) {
33 | if (ABSL_PREDICT_FALSE(!ok())) return false;
34 | if (ABSL_PREDICT_FALSE(!byte_reader_->Pull())) {
35 | if (ABSL_PREDICT_FALSE(!byte_reader_->ok())) {
36 | return Fail(byte_reader_->status());
37 | }
38 | // Empty file: return `false` but leave `ok()` as `true`. This mimics the
39 | // behavior of reading functions at end of file.
40 | return false;
41 | }
42 |
43 | Any::Inlining> reader;
44 | if (RecognizeZlib(*byte_reader_)) {
45 | record_reader_options.compression_type =
46 | tensorflow::io::RecordReaderOptions::ZLIB_COMPRESSION;
47 | record_reader_options.zlib_options =
48 | tensorflow::io::ZlibCompressionOptions::DEFAULT();
49 | record_reader_options.zlib_options.window_bits = 32;
50 | reader = riegeli::Maker>(byte_reader_);
51 | } else {
52 | record_reader_options.compression_type =
53 | tensorflow::io::RecordReaderOptions::NONE;
54 | reader = byte_reader_;
55 | }
56 |
57 | if (ABSL_PREDICT_FALSE(!reader->Pull(sizeof(uint64_t) + sizeof(uint32_t)))) {
58 | if (ABSL_PREDICT_FALSE(!reader->ok())) return Fail(reader->status());
59 | return Fail(absl::InvalidArgumentError("Truncated TFRecord file"));
60 | }
61 | if (tensorflow::crc32c::Unmask(
62 | ReadLittleEndian(reader->cursor() + sizeof(uint64_t))) !=
63 | tensorflow::crc32c::Value(reader->cursor(), sizeof(uint64_t))) {
64 | return Fail(absl::InvalidArgumentError("Corrupted TFRecord file"));
65 | }
66 | return true;
67 | }
68 |
69 | } // namespace riegeli
70 |
--------------------------------------------------------------------------------
/riegeli/bytes/std_io.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/bytes/std_io.h"
16 |
17 | #include
18 |
19 | #include "riegeli/base/assert.h"
20 | #include "riegeli/base/global.h"
21 | #include "riegeli/base/sized_shared_buffer.h"
22 | #include "riegeli/bytes/fd_reader.h"
23 | #include "riegeli/bytes/fd_writer.h"
24 |
25 | namespace riegeli {
26 |
27 | namespace {
28 |
29 | int std_in_fd = 0;
30 | int std_out_fd = 1;
31 | int std_err_fd = 2;
32 |
33 | SizedSharedBuffer& StdInPending() {
34 | return Global([] { return SizedSharedBuffer(); });
35 | }
36 |
37 | } // namespace
38 |
39 | StdIn::StdIn(Options options) : FdReader(std_in_fd, std::move(options)) {
40 | SizedSharedBuffer& pending = StdInPending();
41 | if (!pending.empty()) RestoreBuffer(std::move(pending));
42 | }
43 |
44 | void StdIn::Reset(Options options) {
45 | FdReader::Reset(std_in_fd, std::move(options));
46 | SizedSharedBuffer& pending = StdInPending();
47 | if (!pending.empty()) RestoreBuffer(std::move(pending));
48 | }
49 |
50 | void StdIn::Done() {
51 | RIEGELI_ASSERT(StdInPending().empty())
52 | << "Multiple instances of StdIn in use at a time";
53 | if (available() > 0 && !SupportsRandomAccess()) StdInPending() = SaveBuffer();
54 | FdReader::Done();
55 | }
56 |
57 | StdOut::StdOut(Options options) : FdWriter(std_out_fd, std::move(options)) {}
58 |
59 | void StdOut::Reset(Options options) {
60 | FdWriter::Reset(std_out_fd, std::move(options));
61 | }
62 |
63 | StdErr::StdErr(Options options) : FdWriter(std_err_fd, std::move(options)) {}
64 |
65 | void StdErr::Reset(Options options) {
66 | FdWriter::Reset(std_err_fd, std::move(options));
67 | }
68 |
69 | InjectedStdInFd::InjectedStdInFd(int fd)
70 | : old_fd_(std::exchange(std_in_fd, fd)),
71 | old_pending_(std::move(StdInPending())) {}
72 |
73 | InjectedStdInFd::~InjectedStdInFd() {
74 | std_in_fd = old_fd_;
75 | StdInPending() = std::move(old_pending_);
76 | }
77 |
78 | InjectedStdOutFd::InjectedStdOutFd(int fd)
79 | : old_fd_(std::exchange(std_out_fd, fd)) {}
80 |
81 | InjectedStdOutFd::~InjectedStdOutFd() { std_out_fd = old_fd_; }
82 |
83 | InjectedStdErrFd::InjectedStdErrFd(int fd)
84 | : old_fd_(std::exchange(std_err_fd, fd)) {}
85 |
86 | InjectedStdErrFd::~InjectedStdErrFd() { std_err_fd = old_fd_; }
87 |
88 | } // namespace riegeli
89 |
--------------------------------------------------------------------------------
/riegeli/base/initializer_internal.h:
--------------------------------------------------------------------------------
1 | // Copyright 2024 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef RIEGELI_BASE_INITIALIZER_INTERNAL_H_
16 | #define RIEGELI_BASE_INITIALIZER_INTERNAL_H_
17 |
18 | #include
19 |
20 | #include
21 | #include
22 |
23 | #include "absl/base/casts.h"
24 | #include "absl/base/nullability.h"
25 |
26 | ABSL_POINTERS_DEFAULT_NONNULL
27 |
28 | namespace riegeli::initializer_internal {
29 |
30 | // `CanBindReference::value` is `true` if `Arg&&` can be implicitly
31 | // converted to `T&&` without creating a temporary.
32 | //
33 | // Due to not all compilers implementing http://wg21.link/cwg2352 (converting
34 | // `T*&` to `const T* const&` could have bound the result to a temporary),
35 | // this covers also the case when the corresponding pointers can be converted.
36 | // `BindReference()` should be used for the actual conversion.
37 |
38 | template
39 | struct CanBindReference : std::false_type {};
40 |
41 | template
42 | struct CanBindReference : std::is_convertible {};
43 |
44 | template
45 | struct CanBindReference : std::false_type {};
46 |
47 | template
48 | struct CanBindReference : std::is_convertible {
49 | };
50 |
51 | template
52 | struct CanBindReference : std::false_type {};
53 |
54 | template
55 | struct CanBindReference : std::is_convertible {};
56 |
57 | // `BindReference(arg)` returns `arg` implicitly converted to `T&&`.
58 | //
59 | // Due to not all compilers implementing http://wg21.link/cwg2352 (converting
60 | // `T*&` to `const T* const&` could have bound the result to a temporary),
61 | // this is not implemented as a simple implicit conversion, but by converting
62 | // the reference to a pointer, implicitly converting the pointer, and
63 | // dereferencing back.
64 | template ::value, int> = 0>
66 | inline T&& BindReference(Arg&& arg) {
67 | return std::forward(
68 | *absl::implicit_cast*>(&arg));
69 | }
70 |
71 | } // namespace riegeli::initializer_internal
72 |
73 | #endif // RIEGELI_BASE_INITIALIZER_INTERNAL_H_
74 |
--------------------------------------------------------------------------------
/riegeli/lz4/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "lz4_reader",
12 | srcs = ["lz4_reader.cc"],
13 | hdrs = ["lz4_reader.h"],
14 | # zstd_reader.cc has #define before #include to influence what the included
15 | # files provide.
16 | features = ["-use_header_modules"],
17 | deps = [
18 | ":lz4_dictionary",
19 | "//riegeli/base:arithmetic",
20 | "//riegeli/base:assert",
21 | "//riegeli/base:dependency",
22 | "//riegeli/base:initializer",
23 | "//riegeli/base:object",
24 | "//riegeli/base:recycling_pool",
25 | "//riegeli/base:status",
26 | "//riegeli/base:types",
27 | "//riegeli/bytes:buffer_options",
28 | "//riegeli/bytes:buffered_reader",
29 | "//riegeli/bytes:reader",
30 | "@com_google_absl//absl/base:core_headers",
31 | "@com_google_absl//absl/status",
32 | "@com_google_absl//absl/strings",
33 | "@com_google_absl//absl/strings:string_view",
34 | "@lz4//:lz4_frame",
35 | ],
36 | )
37 |
38 | cc_library(
39 | name = "lz4_writer",
40 | srcs = ["lz4_writer.cc"],
41 | hdrs = ["lz4_writer.h"],
42 | # lz4_writer.cc has #define before #include to influence what the included
43 | # files provide.
44 | features = ["-use_header_modules"],
45 | deps = [
46 | ":lz4_dictionary",
47 | ":lz4_reader",
48 | "//riegeli/base:arithmetic",
49 | "//riegeli/base:assert",
50 | "//riegeli/base:dependency",
51 | "//riegeli/base:initializer",
52 | "//riegeli/base:object",
53 | "//riegeli/base:recycling_pool",
54 | "//riegeli/base:status",
55 | "//riegeli/base:types",
56 | "//riegeli/bytes:buffer_options",
57 | "//riegeli/bytes:buffered_writer",
58 | "//riegeli/bytes:reader",
59 | "//riegeli/bytes:writer",
60 | "@com_google_absl//absl/base:core_headers",
61 | "@com_google_absl//absl/status",
62 | "@com_google_absl//absl/strings",
63 | "@com_google_absl//absl/strings:string_view",
64 | "@lz4",
65 | "@lz4//:lz4_frame",
66 | ],
67 | )
68 |
69 | cc_library(
70 | name = "lz4_dictionary",
71 | srcs = ["lz4_dictionary.cc"],
72 | hdrs = ["lz4_dictionary.h"],
73 | # lz4_dictionary.cc has #define before #include to influence what the
74 | # included files provide.
75 | features = ["-use_header_modules"],
76 | visibility = ["//visibility:private"],
77 | deps = [
78 | "//riegeli/base:bytes_ref",
79 | "//riegeli/base:initializer",
80 | "//riegeli/base:shared_ptr",
81 | "@com_google_absl//absl/base",
82 | "@com_google_absl//absl/base:core_headers",
83 | "@com_google_absl//absl/strings:string_view",
84 | "@lz4//:lz4_frame",
85 | ],
86 | )
87 |
--------------------------------------------------------------------------------
/riegeli/bzip2/bzip2_error.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Google LLC
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include "riegeli/bzip2/bzip2_error.h"
16 |
17 | #include
18 |
19 | #include "absl/status/status.h"
20 | #include "absl/strings/str_cat.h"
21 | #include "absl/strings/string_view.h"
22 | #include "bzlib.h"
23 | #include "riegeli/base/assert.h"
24 |
25 | namespace riegeli::bzip2_internal {
26 |
27 | absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code) {
28 | absl::StatusCode code;
29 | switch (bzlib_code) {
30 | case BZ_OK:
31 | case BZ_RUN_OK:
32 | case BZ_FLUSH_OK:
33 | case BZ_FINISH_OK:
34 | return absl::OkStatus();
35 | case BZ_DATA_ERROR:
36 | case BZ_DATA_ERROR_MAGIC:
37 | code = absl::StatusCode::kInvalidArgument;
38 | break;
39 | case BZ_MEM_ERROR:
40 | code = absl::StatusCode::kResourceExhausted;
41 | break;
42 | default:
43 | // Should not happen.
44 | code = absl::StatusCode::kInternal;
45 | break;
46 | }
47 | std::string message = absl::StrCat(operation, " failed");
48 | absl::string_view details;
49 | switch (bzlib_code) {
50 | case BZ_OK:
51 | case BZ_RUN_OK:
52 | case BZ_FLUSH_OK:
53 | case BZ_FINISH_OK:
54 | RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
55 | case BZ_STREAM_END:
56 | details = "stream end";
57 | break;
58 | case BZ_SEQUENCE_ERROR:
59 | details = "sequence error";
60 | break;
61 | case BZ_PARAM_ERROR:
62 | details = "parameter error";
63 | break;
64 | case BZ_MEM_ERROR:
65 | details = "memory error";
66 | break;
67 | case BZ_DATA_ERROR:
68 | details = "data error";
69 | break;
70 | case BZ_DATA_ERROR_MAGIC:
71 | details = "data error (magic)";
72 | break;
73 | case BZ_IO_ERROR:
74 | details = "I/O error";
75 | break;
76 | case BZ_UNEXPECTED_EOF:
77 | details = "unexpected EOF";
78 | break;
79 | case BZ_OUTBUFF_FULL:
80 | details = "output buffer full";
81 | break;
82 | case BZ_CONFIG_ERROR:
83 | details = "config error";
84 | break;
85 | default:
86 | absl::StrAppend(&message, ": unknown bzlib error code: ", bzlib_code);
87 | break;
88 | }
89 | if (!details.empty()) absl::StrAppend(&message, ": ", details);
90 | return absl::Status(code, message);
91 | }
92 |
93 | } // namespace riegeli::bzip2_internal
94 |
--------------------------------------------------------------------------------
/riegeli/lines/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "newline",
12 | hdrs = ["newline.h"],
13 | deps = ["@com_google_absl//absl/strings:string_view"],
14 | )
15 |
16 | cc_library(
17 | name = "line_reading",
18 | srcs = ["line_reading.cc"],
19 | hdrs = ["line_reading.h"],
20 | deps = [
21 | ":newline",
22 | "//riegeli/base:arithmetic",
23 | "//riegeli/base:assert",
24 | "//riegeli/base:chain",
25 | "//riegeli/bytes:reader",
26 | "@com_google_absl//absl/base:core_headers",
27 | "@com_google_absl//absl/status",
28 | "@com_google_absl//absl/strings",
29 | "@com_google_absl//absl/strings:cord",
30 | "@com_google_absl//absl/strings:string_view",
31 | ],
32 | )
33 |
34 | cc_library(
35 | name = "line_writing",
36 | hdrs = ["line_writing.h"],
37 | deps = [
38 | ":newline",
39 | "//riegeli/base:assert",
40 | "//riegeli/base:type_traits",
41 | "//riegeli/bytes:stringify",
42 | "//riegeli/bytes:writer",
43 | "@com_google_absl//absl/base:core_headers",
44 | ],
45 | )
46 |
47 | cc_library(
48 | name = "text_reader",
49 | srcs = ["text_reader.cc"],
50 | hdrs = ["text_reader.h"],
51 | deps = [
52 | ":newline",
53 | "//riegeli/base:any",
54 | "//riegeli/base:arithmetic",
55 | "//riegeli/base:assert",
56 | "//riegeli/base:dependency",
57 | "//riegeli/base:initializer",
58 | "//riegeli/base:object",
59 | "//riegeli/base:status",
60 | "//riegeli/base:types",
61 | "//riegeli/bytes:buffer_options",
62 | "//riegeli/bytes:buffered_reader",
63 | "//riegeli/bytes:prefix_limiting_reader",
64 | "//riegeli/bytes:reader",
65 | "@com_google_absl//absl/base:core_headers",
66 | "@com_google_absl//absl/status",
67 | "@com_google_absl//absl/strings",
68 | ],
69 | )
70 |
71 | cc_library(
72 | name = "text_writer",
73 | srcs = ["text_writer.cc"],
74 | hdrs = ["text_writer.h"],
75 | deps = [
76 | ":line_writing",
77 | ":newline",
78 | "//riegeli/base:any",
79 | "//riegeli/base:arithmetic",
80 | "//riegeli/base:assert",
81 | "//riegeli/base:dependency",
82 | "//riegeli/base:initializer",
83 | "//riegeli/base:object",
84 | "//riegeli/base:status",
85 | "//riegeli/base:types",
86 | "//riegeli/bytes:buffer_options",
87 | "//riegeli/bytes:buffered_writer",
88 | "//riegeli/bytes:prefix_limiting_writer",
89 | "//riegeli/bytes:writer",
90 | "@com_google_absl//absl/base:core_headers",
91 | "@com_google_absl//absl/status",
92 | "@com_google_absl//absl/strings",
93 | "@com_google_absl//absl/strings:string_view",
94 | ],
95 | )
96 |
--------------------------------------------------------------------------------
/riegeli/gcs/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "gcs_object",
12 | srcs = ["gcs_object.cc"],
13 | hdrs = ["gcs_object.h"],
14 | deps = [
15 | "//riegeli/base:arithmetic",
16 | "//riegeli/base:compare",
17 | "//riegeli/base:debug",
18 | "//riegeli/base:global",
19 | "//riegeli/base:reset",
20 | "//riegeli/base:string_ref",
21 | "//riegeli/bytes:ostream_writer",
22 | "//riegeli/bytes:string_writer",
23 | "//riegeli/bytes:stringify",
24 | "//riegeli/bytes:stringify_writer",
25 | "//riegeli/bytes:writer",
26 | "@com_google_absl//absl/base:core_headers",
27 | "@com_google_absl//absl/status",
28 | "@com_google_absl//absl/strings",
29 | "@com_google_absl//absl/strings:string_view",
30 | ],
31 | )
32 |
33 | cc_library(
34 | name = "gcs_reader",
35 | srcs = [
36 | "gcs_internal.h",
37 | "gcs_reader.cc",
38 | ],
39 | hdrs = ["gcs_reader.h"],
40 | deps = [
41 | ":gcs_object",
42 | "//riegeli/base:arithmetic",
43 | "//riegeli/base:assert",
44 | "//riegeli/base:initializer",
45 | "//riegeli/base:object",
46 | "//riegeli/base:reset",
47 | "//riegeli/base:status",
48 | "//riegeli/base:type_traits",
49 | "//riegeli/base:types",
50 | "//riegeli/bytes:buffer_options",
51 | "//riegeli/bytes:istream_reader",
52 | "//riegeli/bytes:reader",
53 | "@com_google_absl//absl/base:core_headers",
54 | "@com_google_absl//absl/status",
55 | "@com_google_absl//absl/strings",
56 | "@com_google_absl//absl/strings:string_view",
57 | "@google_cloud_cpp//:common",
58 | "@google_cloud_cpp//:storage",
59 | ],
60 | )
61 |
62 | cc_library(
63 | name = "gcs_writer",
64 | srcs = [
65 | "gcs_internal.h",
66 | "gcs_writer.cc",
67 | ],
68 | hdrs = ["gcs_writer.h"],
69 | deps = [
70 | ":gcs_object",
71 | ":gcs_reader",
72 | "//riegeli/base:arithmetic",
73 | "//riegeli/base:assert",
74 | "//riegeli/base:initializer",
75 | "//riegeli/base:object",
76 | "//riegeli/base:reset",
77 | "//riegeli/base:status",
78 | "//riegeli/base:string_ref",
79 | "//riegeli/base:type_traits",
80 | "//riegeli/base:types",
81 | "//riegeli/bytes:buffer_options",
82 | "//riegeli/bytes:ostream_writer",
83 | "//riegeli/bytes:reader",
84 | "//riegeli/bytes:writer",
85 | "@com_google_absl//absl/base:core_headers",
86 | "@com_google_absl//absl/status",
87 | "@com_google_absl//absl/strings",
88 | "@com_google_absl//absl/strings:string_view",
89 | "@google_cloud_cpp//:common",
90 | "@google_cloud_cpp//:storage",
91 | ],
92 | )
93 |
--------------------------------------------------------------------------------
/riegeli/zstd/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "zstd_reader",
12 | srcs = ["zstd_reader.cc"],
13 | hdrs = ["zstd_reader.h"],
14 | # zstd_reader.cc has #define before #include to influence what the included
15 | # files provide.
16 | features = ["-use_header_modules"],
17 | deps = [
18 | ":zstd_dictionary",
19 | "//riegeli/base:arithmetic",
20 | "//riegeli/base:assert",
21 | "//riegeli/base:dependency",
22 | "//riegeli/base:initializer",
23 | "//riegeli/base:object",
24 | "//riegeli/base:recycling_pool",
25 | "//riegeli/base:status",
26 | "//riegeli/base:types",
27 | "//riegeli/bytes:buffer_options",
28 | "//riegeli/bytes:buffered_reader",
29 | "//riegeli/bytes:reader",
30 | "@com_google_absl//absl/base:core_headers",
31 | "@com_google_absl//absl/status",
32 | "@com_google_absl//absl/strings",
33 | "@net_zstd//:zstd",
34 | ],
35 | )
36 |
37 | cc_library(
38 | name = "zstd_writer",
39 | srcs = ["zstd_writer.cc"],
40 | hdrs = ["zstd_writer.h"],
41 | # zstd_writer.cc has #define before #include to influence what the included
42 | # files provide.
43 | features = ["-use_header_modules"],
44 | deps = [
45 | ":zstd_dictionary",
46 | ":zstd_reader",
47 | "//riegeli/base:arithmetic",
48 | "//riegeli/base:assert",
49 | "//riegeli/base:dependency",
50 | "//riegeli/base:initializer",
51 | "//riegeli/base:object",
52 | "//riegeli/base:recycling_pool",
53 | "//riegeli/base:status",
54 | "//riegeli/base:types",
55 | "//riegeli/bytes:buffer_options",
56 | "//riegeli/bytes:buffered_writer",
57 | "//riegeli/bytes:reader",
58 | "//riegeli/bytes:writer",
59 | "@com_google_absl//absl/base:core_headers",
60 | "@com_google_absl//absl/status",
61 | "@com_google_absl//absl/strings",
62 | "@com_google_absl//absl/strings:string_view",
63 | "@net_zstd//:zstd",
64 | ],
65 | )
66 |
67 | cc_library(
68 | name = "zstd_dictionary",
69 | srcs = ["zstd_dictionary.cc"],
70 | hdrs = ["zstd_dictionary.h"],
71 | # zstd_dictionary.cc has #define before #include to influence what the
72 | # included files provide.
73 | features = ["-use_header_modules"],
74 | visibility = ["//visibility:private"],
75 | deps = [
76 | "//riegeli/base:arithmetic",
77 | "//riegeli/base:bytes_ref",
78 | "//riegeli/base:initializer",
79 | "//riegeli/base:shared_ptr",
80 | "@com_google_absl//absl/base",
81 | "@com_google_absl//absl/base:core_headers",
82 | "@com_google_absl//absl/strings:string_view",
83 | "@com_google_absl//absl/synchronization",
84 | "@net_zstd//:zstd",
85 | ],
86 | )
87 |
--------------------------------------------------------------------------------
/riegeli/brotli/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "brotli_reader",
12 | srcs = ["brotli_reader.cc"],
13 | hdrs = ["brotli_reader.h"],
14 | deps = [
15 | ":brotli_allocator",
16 | ":brotli_dictionary",
17 | "//riegeli/base:arithmetic",
18 | "//riegeli/base:assert",
19 | "//riegeli/base:dependency",
20 | "//riegeli/base:initializer",
21 | "//riegeli/base:object",
22 | "//riegeli/base:shared_ptr",
23 | "//riegeli/base:status",
24 | "//riegeli/base:types",
25 | "//riegeli/bytes:pullable_reader",
26 | "//riegeli/bytes:reader",
27 | "@com_google_absl//absl/base:core_headers",
28 | "@com_google_absl//absl/status",
29 | "@com_google_absl//absl/strings",
30 | "@com_google_absl//absl/strings:string_view",
31 | "@org_brotli//:brotlicommon",
32 | "@org_brotli//:brotlidec",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "brotli_writer",
38 | srcs = ["brotli_writer.cc"],
39 | hdrs = ["brotli_writer.h"],
40 | deps = [
41 | ":brotli_allocator",
42 | ":brotli_dictionary",
43 | ":brotli_reader",
44 | "//riegeli/base:arithmetic",
45 | "//riegeli/base:assert",
46 | "//riegeli/base:dependency",
47 | "//riegeli/base:initializer",
48 | "//riegeli/base:object",
49 | "//riegeli/base:shared_ptr",
50 | "//riegeli/base:status",
51 | "//riegeli/base:types",
52 | "//riegeli/bytes:buffer_options",
53 | "//riegeli/bytes:buffered_writer",
54 | "//riegeli/bytes:reader",
55 | "//riegeli/bytes:writer",
56 | "@com_google_absl//absl/base:core_headers",
57 | "@com_google_absl//absl/status",
58 | "@com_google_absl//absl/strings",
59 | "@com_google_absl//absl/strings:string_view",
60 | "@org_brotli//:brotlienc",
61 | ],
62 | )
63 |
64 | cc_library(
65 | name = "brotli_dictionary",
66 | srcs = ["brotli_dictionary.cc"],
67 | hdrs = ["brotli_dictionary.h"],
68 | visibility = ["//visibility:private"],
69 | deps = [
70 | "//riegeli/base:assert",
71 | "//riegeli/base:bytes_ref",
72 | "//riegeli/base:initializer",
73 | "//riegeli/base:shared_ptr",
74 | "@com_google_absl//absl/base",
75 | "@com_google_absl//absl/base:core_headers",
76 | "@com_google_absl//absl/strings:string_view",
77 | "@com_google_absl//absl/types:span",
78 | "@org_brotli//:brotlicommon",
79 | "@org_brotli//:brotlienc",
80 | ],
81 | )
82 |
83 | cc_library(
84 | name = "brotli_allocator",
85 | srcs = ["brotli_allocator.cc"],
86 | hdrs = ["brotli_allocator.h"],
87 | visibility = ["//visibility:private"],
88 | deps = [
89 | "//riegeli/base:shared_ptr",
90 | "@org_brotli//:brotlicommon",
91 | ],
92 | )
93 |
--------------------------------------------------------------------------------
/riegeli/tensorflow/io/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "file_reader",
12 | srcs = ["file_reader.cc"],
13 | hdrs = ["file_reader.h"],
14 | deps = [
15 | "//riegeli/base:arithmetic",
16 | "//riegeli/base:assert",
17 | "//riegeli/base:buffering",
18 | "//riegeli/base:chain",
19 | "//riegeli/base:dependency",
20 | "//riegeli/base:external_ref",
21 | "//riegeli/base:initializer",
22 | "//riegeli/base:null_safe_memcpy",
23 | "//riegeli/base:object",
24 | "//riegeli/base:reset",
25 | "//riegeli/base:sized_shared_buffer",
26 | "//riegeli/base:status",
27 | "//riegeli/base:types",
28 | "//riegeli/bytes:backward_writer",
29 | "//riegeli/bytes:buffer_options",
30 | "//riegeli/bytes:path_ref",
31 | "//riegeli/bytes:reader",
32 | "//riegeli/bytes:writer",
33 | "@com_google_absl//absl/base:core_headers",
34 | "@com_google_absl//absl/functional:function_ref",
35 | "@com_google_absl//absl/status",
36 | "@com_google_absl//absl/strings",
37 | "@com_google_absl//absl/strings:cord",
38 | "@com_google_absl//absl/strings:string_view",
39 | "@com_google_absl//absl/types:span",
40 | "@local_config_tf//:tf_header_lib",
41 | ],
42 | )
43 |
44 | cc_library(
45 | name = "file_writer",
46 | srcs = ["file_writer.cc"],
47 | hdrs = ["file_writer.h"],
48 | deps = [
49 | ":file_reader",
50 | "//riegeli/base:arithmetic",
51 | "//riegeli/base:assert",
52 | "//riegeli/base:buffering",
53 | "//riegeli/base:byte_fill",
54 | "//riegeli/base:chain",
55 | "//riegeli/base:dependency",
56 | "//riegeli/base:external_ref",
57 | "//riegeli/base:initializer",
58 | "//riegeli/base:object",
59 | "//riegeli/base:reset",
60 | "//riegeli/base:shared_buffer",
61 | "//riegeli/base:status",
62 | "//riegeli/base:types",
63 | "//riegeli/bytes:buffer_options",
64 | "//riegeli/bytes:path_ref",
65 | "//riegeli/bytes:reader",
66 | "//riegeli/bytes:writer",
67 | "@com_google_absl//absl/base:core_headers",
68 | "@com_google_absl//absl/status",
69 | "@com_google_absl//absl/strings",
70 | "@com_google_absl//absl/strings:cord",
71 | "@com_google_absl//absl/strings:string_view",
72 | "@local_config_tf//:tf_header_lib",
73 | ],
74 | )
75 |
76 | cc_library(
77 | name = "tstring_writer",
78 | hdrs = ["tstring_writer.h"],
79 | deps = [
80 | "//riegeli/base:arithmetic",
81 | "//riegeli/base:assert",
82 | "//riegeli/base:dependency",
83 | "//riegeli/base:initializer",
84 | "//riegeli/base:object",
85 | "//riegeli/bytes:resizable_writer",
86 | "@local_config_tf//:tf_header_lib",
87 | ],
88 | )
89 |
--------------------------------------------------------------------------------
/python/riegeli/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Writes or reads Riegeli/records files."""
15 |
16 | from riegeli.base import riegeli_error
17 | from riegeli.records import record_position
18 | from riegeli.records import record_reader
19 | from riegeli.records import record_writer
20 | from riegeli.records import records_metadata_pb2
21 | from riegeli.records import skipped_region
22 |
23 | __all__ = (
24 | 'RiegeliError',
25 | 'CancelledError',
26 | 'UnknownError',
27 | 'InvalidArgumentError',
28 | 'DeadlineExceededError',
29 | 'NotFoundError',
30 | 'AlreadyExistsError',
31 | 'PermissionDeniedError',
32 | 'UnauthenticatedError',
33 | 'ResourceExhaustedError',
34 | 'FailedPreconditionError',
35 | 'AbortedError',
36 | 'OutOfRangeError',
37 | 'UnimplementedError',
38 | 'InternalError',
39 | 'UnavailableError',
40 | 'DataLossError',
41 | 'FlushType',
42 | 'RecordPosition',
43 | 'SkippedRegion',
44 | 'RecordsMetadata',
45 | 'set_record_type',
46 | 'RecordWriter',
47 | 'EXISTENCE_ONLY',
48 | 'get_record_type',
49 | 'RecordReader',
50 | )
51 |
52 | # pylint: disable=invalid-name
53 | RiegeliError = riegeli_error.RiegeliError
54 | CancelledError = riegeli_error.CancelledError
55 | UnknownError = riegeli_error.UnknownError
56 | InvalidArgumentError = riegeli_error.InvalidArgumentError
57 | DeadlineExceededError = riegeli_error.DeadlineExceededError
58 | NotFoundError = riegeli_error.NotFoundError
59 | AlreadyExistsError = riegeli_error.AlreadyExistsError
60 | PermissionDeniedError = riegeli_error.PermissionDeniedError
61 | UnauthenticatedError = riegeli_error.UnauthenticatedError
62 | ResourceExhaustedError = riegeli_error.ResourceExhaustedError
63 | FailedPreconditionError = riegeli_error.FailedPreconditionError
64 | AbortedError = riegeli_error.AbortedError
65 | OutOfRangeError = riegeli_error.OutOfRangeError
66 | UnimplementedError = riegeli_error.UnimplementedError
67 | InternalError = riegeli_error.InternalError
68 | UnavailableError = riegeli_error.UnavailableError
69 | DataLossError = riegeli_error.DataLossError
70 | RecordPosition = record_position.RecordPosition
71 | SkippedRegion = skipped_region.SkippedRegion
72 | RecordsMetadata = records_metadata_pb2.RecordsMetadata
73 | FlushType = record_writer.FlushType
74 | set_record_type = record_writer.set_record_type
75 | RecordWriter = record_writer.RecordWriter
76 | EXISTENCE_ONLY = record_reader.EXISTENCE_ONLY
77 | get_record_type = record_reader.get_record_type
78 | RecordReader = record_reader.RecordReader
79 |
--------------------------------------------------------------------------------
/riegeli/csv/BUILD:
--------------------------------------------------------------------------------
1 | load("@rules_cc//cc:defs.bzl", "cc_library")
2 |
3 | package(
4 | default_visibility = ["//visibility:public"],
5 | features = ["header_modules"],
6 | )
7 |
8 | licenses(["notice"])
9 |
10 | cc_library(
11 | name = "csv_reader",
12 | srcs = ["csv_reader.cc"],
13 | hdrs = ["csv_reader.h"],
14 | deps = [
15 | ":csv_record",
16 | "//riegeli/base:arithmetic",
17 | "//riegeli/base:assert",
18 | "//riegeli/base:debug",
19 | "//riegeli/base:dependency",
20 | "//riegeli/base:initializer",
21 | "//riegeli/base:object",
22 | "//riegeli/base:reset",
23 | "//riegeli/base:status",
24 | "//riegeli/bytes:reader",
25 | "//riegeli/bytes:string_reader",
26 | "//riegeli/bytes:string_writer",
27 | "//riegeli/lines:line_reading",
28 | "@com_google_absl//absl/base:core_headers",
29 | "@com_google_absl//absl/status",
30 | "@com_google_absl//absl/strings",
31 | "@com_google_absl//absl/strings:string_view",
32 | "@com_google_absl//absl/types:span",
33 | ],
34 | )
35 |
36 | cc_library(
37 | name = "csv_writer",
38 | srcs = ["csv_writer.cc"],
39 | hdrs = ["csv_writer.h"],
40 | deps = [
41 | ":csv_record",
42 | "//riegeli/base:arithmetic",
43 | "//riegeli/base:assert",
44 | "//riegeli/base:debug",
45 | "//riegeli/base:dependency",
46 | "//riegeli/base:initializer",
47 | "//riegeli/base:iterable",
48 | "//riegeli/base:object",
49 | "//riegeli/base:reset",
50 | "//riegeli/base:status",
51 | "//riegeli/bytes:string_writer",
52 | "//riegeli/bytes:writer",
53 | "//riegeli/lines:line_writing",
54 | "//riegeli/lines:newline",
55 | "@com_google_absl//absl/base:core_headers",
56 | "@com_google_absl//absl/status",
57 | "@com_google_absl//absl/strings",
58 | "@com_google_absl//absl/strings:string_view",
59 | ],
60 | )
61 |
62 | cc_library(
63 | name = "csv_record",
64 | srcs = ["csv_record.cc"],
65 | hdrs = ["csv_record.h"],
66 | deps = [
67 | "//riegeli/base:arithmetic",
68 | "//riegeli/base:assert",
69 | "//riegeli/base:compare",
70 | "//riegeli/base:global",
71 | "//riegeli/base:initializer",
72 | "//riegeli/base:iterable",
73 | "//riegeli/base:reset",
74 | "//riegeli/base:shared_ptr",
75 | "//riegeli/base:string_ref",
76 | "//riegeli/base:type_traits",
77 | "//riegeli/bytes:ostream_writer",
78 | "//riegeli/bytes:string_writer",
79 | "//riegeli/bytes:stringify_writer",
80 | "//riegeli/bytes:writer",
81 | "@com_google_absl//absl/base",
82 | "@com_google_absl//absl/base:core_headers",
83 | "@com_google_absl//absl/container:flat_hash_map",
84 | "@com_google_absl//absl/status",
85 | "@com_google_absl//absl/strings",
86 | "@com_google_absl//absl/strings:string_view",
87 | "@com_google_absl//absl/synchronization",
88 | "@com_google_absl//absl/types:span",
89 | ],
90 | )
91 |
--------------------------------------------------------------------------------