70 template <
typename ref_id_type,
71 typename ref_id_tmp_type,
73 typename ref_seqs_type>
75 ref_id_tmp_type & ref_id_tmp,
79 template <
typename align_type,
typename ref_seqs_type>
82 [[maybe_unused]] int32_t rid,
83 [[maybe_unused]] ref_seqs_type & ref_seqs,
84 [[maybe_unused]] int32_t ref_start,
89 template <
typename stream_view_t>
92 template <
typename stream_view_type, std::ranges::forward_range target_range_type>
95 template <
typename stream_view_t, arithmetic arithmetic_target_type>
98 template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
103 template <
typename stream_t,
typename ref_
ids_type>
119template <
typename ref_id_type,
120 typename ref_id_tmp_type,
121 typename header_type,
122 typename ref_seqs_type>
124 ref_id_tmp_type & ref_id_tmp,
125 header_type & header,
128 if (!std::ranges::empty(ref_id_tmp))
130 auto search = header.ref_dict.find(ref_id_tmp);
132 if (
search == header.ref_dict.end())
134 if constexpr(detail::decays_to_ignore_v<ref_seqs_type>)
138 throw format_error{
"Unknown reference id found in record which is not present in the header."};
142 header.ref_ids().push_back(ref_id_tmp);
144 header.ref_dict[header.ref_ids()[pos]] = pos;
150 throw format_error{
"Unknown reference id found in record which is not present in the given ids."};
167 int32_t & sc_end)
const
170 auto soft_clipping_at = [&] (
size_t const index) {
return cigar_vector[index] ==
'S'_cigar_operation; };
172 auto hard_clipping_at = [&] (
size_t const index) {
return cigar_vector[index] ==
'H'_cigar_operation; };
174 auto vector_size_at_least = [&] (
size_t const min_size) {
return cigar_vector.
size() >= min_size; };
176 auto cigar_count_at = [&] (
size_t const index) {
return get<0>(cigar_vector[index]); };
179 if (vector_size_at_least(1) && soft_clipping_at(0))
180 sc_begin = cigar_count_at(0);
181 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
182 sc_begin = cigar_count_at(1);
187 auto last_index = cigar_vector.
size() - 1;
188 auto second_last_index = last_index - 1;
190 if (vector_size_at_least(2) && soft_clipping_at(last_index))
191 sc_end = cigar_count_at(last_index);
192 else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
193 sc_end = cigar_count_at(second_last_index);
206template <
typename align_type,
typename ref_seqs_type>
209 [[maybe_unused]] int32_t rid,
210 [[maybe_unused]] ref_seqs_type & ref_seqs,
211 [[maybe_unused]] int32_t ref_start,
214 if (rid > -1 && ref_start > -1 &&
215 !cigar_vector.
empty() &&
216 !std::ranges::empty(get<1>(align)))
218 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
220 assert(
static_cast<size_t>(ref_start + ref_length) <=
std::ranges::size(ref_seqs[rid]));
222 assign_unaligned(get<0>(align), ref_seqs[rid] |
views::slice(ref_start, ref_start + ref_length));
227 auto dummy_seq =
views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, ref_length)
229 static_assert(std::same_as<unaligned_t,
decltype(dummy_seq)>,
230 "No reference information was given so the type of the first alignment tuple position"
231 "must have an unaligned sequence type of a dummy sequence ("
232 "views::repeat_n(dna5{}, size_t{}) | "
233 "std::views::transform(detail::access_restrictor_fn{}))");
235 assign_unaligned(get<0>(align), dummy_seq);
243 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
246 assign_unaligned(get<0>(align), ref_seqs[0] |
views::slice(0, 0));
251 assign_unaligned(get<0>(align),
views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, 0)
266template <
typename stream_view_t>
277 if (res.ec == std::errc::invalid_argument || res.ptr != end)
280 "' could not be cast into type uint8_t."};
282 if (res.ec == std::errc::result_out_of_range)
284 "' into type uint8_t would cause an overflow."};
295template <
typename stream_view_type, std::ranges::forward_range target_range_type>
298 using target_range_value_t = std::ranges::range_value_t<target_range_type>;
299 using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
300 using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
304 if (
auto it = std::ranges::begin(stream_view); it != std::ranges::end(stream_view))
307 if (
char c = *it; !(++it == std::ranges::end(stream_view) && c ==
'*'))
310 std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
311 | views::char_to<target_range_value_t>,
327template <
typename stream_view_t, arithmetic arithmetic_target_type>
335 if (res.ec == std::errc::invalid_argument || res.ptr != end)
338 "' could not be cast into type " +
339 detail::type_name_as_string<arithmetic_target_type>};
341 if (res.ec == std::errc::result_out_of_range)
343 "' into type " + detail::type_name_as_string<arithmetic_target_type> +
344 " would cause an overflow."};
363template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
368 auto it = std::ranges::begin(stream_view);
369 auto end = std::ranges::end(stream_view);
372 auto make_tag = [] (uint8_t char1, uint8_t char2)
constexpr
374 return static_cast<uint16_t
>(char1) | (
static_cast<uint16_t
>(char2) << CHAR_BIT);
379 auto parse_and_make_tag = [&] ()
385 return make_tag(raw_tag[0], raw_tag[1]);
388 auto take_until_predicate = [&it, &string_buffer] (
auto const & predicate)
390 string_buffer.clear();
391 while (!predicate(*it))
393 string_buffer.push_back(*it);
398 auto skip_until_predicate = [&it] (
auto const & predicate)
400 while (!predicate(*it))
404 auto copy_next_tag_value_into_buffer = [&] ()
406 skip_until_predicate(is_char<':'>);
408 take_until_predicate(is_char<'\t'> || is_char<'\n'>);
420 take_until_predicate(is_char<'\t'> || is_char<'\n'>);
428 auto print_cerr_of_unspported_tag = [&it] (
char const *
const header_tag,
std::array<char, 2> raw_tag)
430 std::cerr <<
"Unsupported SAM header tag in @" << header_tag <<
": " << raw_tag[0] << raw_tag[1] <<
'\n';
433 while (it != end && is_char<'@'>(*it))
437 switch (parse_and_make_tag())
439 case make_tag(
'H',
'D'):
442 while (is_char<'\t'>(*it))
447 switch (parse_and_make_tag())
449 case make_tag(
'V',
'N'):
454 case make_tag(
'S',
'O'):
459 case make_tag(
'S',
'S'):
464 case make_tag(
'G',
'O'):
471 print_cerr_of_unspported_tag(
"HD", raw_tag);
475 if (header_entry !=
nullptr)
477 copy_next_tag_value_into_buffer();
481 skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
491 case make_tag(
'S',
'Q'):
494 std::ranges::range_value_t<
decltype(hdr.
ref_ids())>
id;
499 while (is_char<'\t'>(*it))
503 switch (parse_and_make_tag())
505 case make_tag(
'S',
'N'):
507 copy_next_tag_value_into_buffer();
511 case make_tag(
'L',
'N'):
513 int32_t sequence_length_tmp{};
514 copy_next_tag_value_into_buffer();
516 sequence_length = sequence_length_tmp;
521 parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
529 if (!sequence_length.has_value())
531 if (sequence_length.value() <= 0)
534 get<0>(info) = sequence_length.value();
537 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
543 "(header.ref_ids(): ", hdr.
ref_ids(),
").")};
545 auto & given_ref_info = hdr.
ref_id_info[id_it->second];
547 if (std::get<0>(given_ref_info) != std::get<0>(info))
548 throw format_error{
"Provided and header-based reference length differ."};
554 static_assert(!detail::is_type_specialisation_of_v<
decltype(hdr.
ref_ids()),
std::deque>,
555 "The range over reference ids must be of type std::deque such that pointers are not "
565 case make_tag(
'R',
'G'):
570 while (is_char<'\t'>(*it))
574 switch (parse_and_make_tag())
576 case make_tag(
'I',
'D'):
578 copy_next_tag_value_into_buffer();
584 parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
590 if (get<0>(tmp).empty())
597 case make_tag(
'P',
'G'):
602 while (is_char<'\t'>(*it))
607 switch (parse_and_make_tag())
609 case make_tag(
'I',
'D'):
614 case make_tag(
'P',
'N'):
619 case make_tag(
'P',
'P'):
624 case make_tag(
'C',
'L'):
629 case make_tag(
'D',
'S'):
634 case make_tag(
'V',
'N'):
641 print_cerr_of_unspported_tag(
"PG", raw_tag);
645 if (program_info_entry !=
nullptr)
647 copy_next_tag_value_into_buffer();
651 skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
662 case make_tag(
'C',
'O'):
666 take_until_predicate(is_char<'\n'>);
695template <
typename stream_t,
typename ref_
ids_type>
707 !(header.
sorting ==
"unknown" ||
708 header.
sorting ==
"unsorted" ||
709 header.
sorting ==
"queryname" ||
710 header.
sorting ==
"coordinate" ))
711 throw format_error{
"SAM format error: The header.sorting member must be "
712 "one of [unknown, unsorted, queryname, coordinate]."};
718 throw format_error{
"SAM format error: The header.grouping member must be "
719 "one of [none, query, reference]."};
738 stream <<
"@HD\tVN:";
742 stream <<
"\tSO:" << header.
sorting;
748 stream <<
"\tGO:" << header.
grouping;
755 stream <<
"@SQ\tSN:";
757 std::ranges::copy(ref_name, stream_it);
759 stream <<
"\tLN:" << get<0>(ref_info);
761 if (!get<1>(ref_info).empty())
762 stream <<
"\t" << get<1>(ref_info);
771 <<
"\tID:" << get<0>(read_group);
773 if (!get<1>(read_group).empty())
774 stream <<
"\t" << get<1>(read_group);
783 <<
"\tID:" << program.id;
785 if (!program.name.empty())
786 stream <<
"\tPN:" << program.name;
788 if (!program.command_line_call.empty())
789 stream <<
"\tCL:" << program.command_line_call;
791 if (!program.previous.empty())
792 stream <<
"\tPP:" << program.previous;
794 if (!program.description.empty())
795 stream <<
"\tDS:" << program.description;
797 if (!program.version.empty())
798 stream <<
"\tVN:" << program.version;
T back_inserter(T... args)
Provides seqan3::views::char_to.
Provides various utility functions.
T emplace_back(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: concept.hpp:526
void alignment_from_cigar(alignment_type &alignment, std::vector< cigar > const &cigar_vector)
Transforms a std::vector of operation-count pairs (representing the cigar string).
Definition: cigar.hpp:381
constexpr void write_eol(it_t &it, bool const add_cr)
Write "\n" or "\r\n" to the stream iterator, depending on arguments.
Definition: misc.hpp:49
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:495
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:183
constexpr auto zip
A zip view.
Definition: zip.hpp:29
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides various utility functions.
Auxiliary functions for the alignment IO.
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
std::string to_string(value_type &&...values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition: to_string.hpp:29
Provides seqan3::debug_stream and related types.
The <ranges> header from C++20's standard library.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
A functor that always throws when calling operator() (needed for the alignment "dummy" sequence).
Definition: cigar.hpp:434
Object storing information for a search (of a search scheme).
Definition: search_scheme_precomputed.hpp:28
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:26
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:30
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.