|
|
|
@ -158,7 +158,7 @@ py::array build_sample_idx(const py::array_t<int32_t>& sizes_,
|
|
|
|
|
doc_offset += (remaining_seq_length + doc_length - 1);
|
|
|
|
|
remaining_seq_length = 0;
|
|
|
|
|
} else {
|
|
|
|
|
// Otherwise, start from the begining of the next document.
|
|
|
|
|
// Otherwise, start from the beginning of the next document.
|
|
|
|
|
++doc_idx_index;
|
|
|
|
|
doc_offset = 0;
|
|
|
|
|
}
|
|
|
|
@ -296,7 +296,7 @@ py::array build_mapping_impl(const py::array_t<int64_t>& docs_,
|
|
|
|
|
const auto sent_index_first = docs[doc];
|
|
|
|
|
const auto sent_index_last = docs[doc + 1];
|
|
|
|
|
|
|
|
|
|
// At the begining of the document previous index is the
|
|
|
|
|
// At the beginning of the document previous index is the
|
|
|
|
|
// start index.
|
|
|
|
|
auto prev_start_index = sent_index_first;
|
|
|
|
|
|
|
|
|
@ -556,7 +556,7 @@ py::array build_blocks_mapping_impl(const py::array_t<int64_t>& docs_,
|
|
|
|
|
const auto sent_index_last = docs[doc + 1];
|
|
|
|
|
const auto target_seq_len = max_seq_length - titles_sizes[doc];
|
|
|
|
|
|
|
|
|
|
// At the begining of the document previous index is the
|
|
|
|
|
// At the beginning of the document previous index is the
|
|
|
|
|
// start index.
|
|
|
|
|
auto prev_start_index = sent_index_first;
|
|
|
|
|
|
|
|
|
|