Verified Commit c5230713 authored by nagayama15's avatar nagayama15

feat: implement extraction by chunk reordering method

parent 148a72e9
......@@ -4,6 +4,7 @@
#include "Ordering.hpp"
#include <algorithm>
#include "BitStreamWriter.hpp"
#include "CircularBitStreamReader.hpp"
namespace kyut {
......@@ -13,23 +14,31 @@ namespace kyut {
0, 0, 1, 2, 4, 6, 9, 12, 15, 18, 21, 25, 28, 32, 36, 40, 44, 48, 52, 56, 61};
template <typename RandomAccessIterator, typename Less>
inline void embed_in_chunk(
std::uint64_t watermark,
inline std::size_t embed_in_chunk(
CircularBitStreamReader& r,
RandomAccessIterator begin,
RandomAccessIterator end,
Less less) {
assert(std::distance(begin, end) >= 0);
assert(std::distance(begin, end) <= std::ptrdiff_t{max_chunk_size});
std::sort(begin, end, less);
const std::size_t count = std::distance(begin, end);
const auto bit_width = factorial_bit_width_table[count];
std::uint64_t watermark = r.read(bit_width);
for (auto it = begin; it != end; ++it) {
const std::size_t count = std::distance(it, end);
// Sort the chunk.
std::sort(begin, end, less);
const std::uint64_t w = watermark % count;
watermark /= count;
// Embed watermark.
for (std::size_t i = 0; i < count; i++) {
const std::uint64_t w = watermark % (count - i);
watermark /= (count - i);
const auto it = begin + i;
std::iter_swap(it, it + w);
}
return bit_width;
}
template <typename RandomAccessIterator, typename Less>
......@@ -50,12 +59,79 @@ namespace kyut {
const auto chunk_begin = begin + i;
const auto chunk_end = chunk_begin + chunk_size;
const auto bit_width = factorial_bit_width_table[chunk_size];
const auto watermark = r.read(bit_width);
size_bits += embed_in_chunk(r, chunk_begin, chunk_end, less);
}
return size_bits;
}
template <typename RandomAccessIterator, typename Less>
inline std::size_t extract_from_chunk(
BitStreamWriter& w,
RandomAccessIterator begin,
RandomAccessIterator end,
Less less) {
assert(std::distance(begin, end) >= 0);
assert(std::distance(begin, end) <= std::ptrdiff_t{max_chunk_size});
const std::size_t count = std::distance(begin, end);
// Sort the chunk.
std::vector<RandomAccessIterator> chunk{};
chunk.reserve(count);
for (auto it = begin; it != end; it++) {
chunk.emplace_back(it);
}
std::sort(std::begin(chunk), std::end(chunk), [&less](const RandomAccessIterator& a, const RandomAccessIterator& b) {
return less(*a, *b);
});
// Extract watermark.
std::uint64_t watermark = 0;
std::uint64_t base = 1;
for (std::size_t i = 0; i < count; i++) {
const auto it = std::begin(chunk) + i;
// Find the position of `it`.
const auto found = std::find(it, std::end(chunk), begin + i);
assert(found != std::end(chunk));
const std::size_t pos = std::distance(it, found);
watermark += pos * base;
base *= count - i;
// Remove `it` found in this step.
std::iter_swap(it, found);
}
const auto bit_width = factorial_bit_width_table[count];
w.write(watermark, bit_width);
return bit_width;
}
template <typename RandomAccessIterator, typename Less>
inline std::size_t extract_by_ordering(
BitStreamWriter& w,
std::size_t chunk_size,
RandomAccessIterator begin,
RandomAccessIterator end,
Less less) {
assert(2 <= chunk_size && chunk_size <= max_chunk_size);
assert(std::distance(begin, end) >= 0);
const std::size_t count = std::distance(begin, end);
embed_in_chunk(watermark, chunk_begin, chunk_end, less);
std::size_t size_bits = 0;
for (std::size_t i = 0; i < count; i += chunk_size) {
const std::size_t chunk_size = (std::min)(chunk_size, count - i);
const auto chunk_begin = begin + i;
const auto chunk_end = chunk_begin + chunk_size;
size_bits += bit_width;
size_bits += extract_from_chunk(w, chunk_begin, chunk_end, less);
}
return size_bits;
......@@ -71,6 +147,16 @@ namespace kyut {
Less less) {
return detail::embed_by_ordering(r, chunk_size, begin, end, less);
}
template <typename RandomAccessIterator, typename Less>
inline std::size_t extract_by_ordering(
BitStreamWriter& w,
std::size_t chunk_size,
RandomAccessIterator begin,
RandomAccessIterator end,
Less less) {
return detail::extract_by_ordering(w, chunk_size, begin, end, less);
}
} // namespace kyut
#endif // INCLUDE_kyut_Ordering_inl_hpp
......@@ -5,6 +5,7 @@
namespace kyut {
class CircularBitStreamReader;
class BitStreamWriter;
constexpr std::size_t max_chunk_size = 20;
......@@ -15,6 +16,14 @@ namespace kyut {
RandomAccessIterator begin,
RandomAccessIterator end,
Less less);
template <typename RandomAccessIterator, typename Less>
std::size_t extract_by_ordering(
BitStreamWriter& w,
std::size_t chunk_size,
RandomAccessIterator begin,
RandomAccessIterator end,
Less less);
} // namespace kyut
#include "Ordering-inl.hpp"
......
......@@ -38,3 +38,35 @@ TEST(kyut_Ordering, embed_by_ordering) {
check_embed("2314", "\x50"sv, 20, 4, "2314");
check_embed("2314", "\x00"sv, 20, 4, "1234");
}
namespace {
void check_extract(
std::string data,
std::size_t chunk_size,
std::size_t expected_size_bits_extracted,
std::string_view expected_watermark_extracted) {
kyut::BitStreamWriter w{};
const auto size_bits = kyut::extract_by_ordering(
w,
chunk_size,
std::begin(data),
std::end(data),
std::less<>{});
EXPECT_EQ(size_bits, expected_size_bits_extracted);
EXPECT_EQ(w.position_bits(), size_bits);
EXPECT_EQ(w.data_as_str(), expected_watermark_extracted);
}
} // namespace
TEST(kyut_Ordering, extract_by_ordering) {
using namespace std::string_view_literals;
check_extract("1234", 20, 4, "\x00"sv);
check_extract("2134", 20, 4, "\x10"sv);
check_extract("3214", 20, 4, "\x20"sv);
check_extract("4231", 20, 4, "\x30"sv);
check_extract("1324", 20, 4, "\x40"sv);
check_extract("2314", 20, 4, "\x50"sv);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment