SeqAn3 3.2.0-rc.1
The Modern C++ library for sequence analysis.
format_sam_base.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
14#pragma once
15
16#include <seqan3/std/ranges>
17#include <string>
18#include <vector>
19
31
32namespace seqan3::detail
33{
34
44{
45protected:
49 format_sam_base() = default;
50 format_sam_base(format_sam_base const &) = default;
54 ~format_sam_base() = default;
55
57
59 static constexpr std::array format_version{'1', '.', '6'};
60
62 std::array<char, 316> arithmetic_buffer{}; // Doubles can be up to 316 characters
63
65 bool header_was_written{false};
66
69
70 template <typename ref_id_type,
71 typename ref_id_tmp_type,
72 typename header_type,
73 typename ref_seqs_type>
74 void check_and_assign_ref_id(ref_id_type & ref_id,
75 ref_id_tmp_type & ref_id_tmp,
76 header_type & header,
77 ref_seqs_type & /*tag*/);
78
79 template <typename align_type, typename ref_seqs_type>
80 void construct_alignment(align_type & align,
81 std::vector<cigar> & cigar_vector,
82 [[maybe_unused]] int32_t rid,
83 [[maybe_unused]] ref_seqs_type & ref_seqs,
84 [[maybe_unused]] int32_t ref_start,
85 size_t ref_length);
86
87 void transfer_soft_clipping_to(std::vector<cigar> const & cigar_vector, int32_t & sc_begin, int32_t & sc_end) const;
88
89 template <typename stream_view_t>
90 void read_byte_field(stream_view_t && stream_view, std::byte & byte_target);
91
92 template <typename stream_view_type, std::ranges::forward_range target_range_type>
93 void read_forward_range_field(stream_view_type && stream_view, target_range_type & target);
94
95 template <typename stream_view_t, arithmetic arithmetic_target_type>
96 void read_arithmetic_field(stream_view_t && stream_view, arithmetic_target_type & arithmetic_target);
97
98 template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
99 void read_header(stream_view_type && stream_view,
101 ref_seqs_type & /*ref_id_to_pos_map*/);
102
103 template <typename stream_t, typename ref_ids_type>
104 void write_header(stream_t & stream,
105 sam_file_output_options const & options,
107};
108
119template <typename ref_id_type,
120 typename ref_id_tmp_type,
121 typename header_type,
122 typename ref_seqs_type>
123inline void format_sam_base::check_and_assign_ref_id(ref_id_type & ref_id,
124 ref_id_tmp_type & ref_id_tmp,
125 header_type & header,
126 ref_seqs_type & /*tag*/)
127{
128 if (!std::ranges::empty(ref_id_tmp)) // otherwise the std::optional will not be filled
129 {
130 auto search = header.ref_dict.find(ref_id_tmp);
131
132 if (search == header.ref_dict.end())
133 {
134 if constexpr(detail::decays_to_ignore_v<ref_seqs_type>) // no reference information given
135 {
137 {
138 throw format_error{"Unknown reference id found in record which is not present in the header."};
139 }
140 else
141 {
142 header.ref_ids().push_back(ref_id_tmp);
143 auto pos = std::ranges::size(header.ref_ids()) - 1;
144 header.ref_dict[header.ref_ids()[pos]] = pos;
145 ref_id = pos;
146 }
147 }
148 else
149 {
150 throw format_error{"Unknown reference id found in record which is not present in the given ids."};
151 }
152 }
153 else
154 {
155 ref_id = search->second;
156 }
157 }
158}
159
166 int32_t & sc_begin,
167 int32_t & sc_end) const
168{
169 // Checks if the given index in the cigar vector is a soft clip.
170 auto soft_clipping_at = [&] (size_t const index) { return cigar_vector[index] == 'S'_cigar_operation; };
171 // Checks if the given index in the cigar vector is a hard clip.
172 auto hard_clipping_at = [&] (size_t const index) { return cigar_vector[index] == 'H'_cigar_operation; };
173 // Checks if the given cigar vector as at least min_size many elements.
174 auto vector_size_at_least = [&] (size_t const min_size) { return cigar_vector.size() >= min_size; };
175 // Returns the cigar count of the ith cigar element in the given cigar vector.
176 auto cigar_count_at = [&] (size_t const index) { return get<0>(cigar_vector[index]); };
177
178 // check for soft clipping at the first two positions
179 if (vector_size_at_least(1) && soft_clipping_at(0))
180 sc_begin = cigar_count_at(0);
181 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
182 sc_begin = cigar_count_at(1);
183
184 // Check for soft clipping at the last two positions. But only if the vector size has at least 2, respectively
185 // 3 elements. Accordingly, if the following arithmetics overflow they are protected by the corresponding
186 // if expressions below.
187 auto last_index = cigar_vector.size() - 1;
188 auto second_last_index = last_index - 1;
189
190 if (vector_size_at_least(2) && soft_clipping_at(last_index))
191 sc_end = cigar_count_at(last_index);
192 else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
193 sc_end = cigar_count_at(second_last_index);
194}
195
206template <typename align_type, typename ref_seqs_type>
207inline void format_sam_base::construct_alignment(align_type & align,
208 std::vector<cigar> & cigar_vector,
209 [[maybe_unused]] int32_t rid,
210 [[maybe_unused]] ref_seqs_type & ref_seqs,
211 [[maybe_unused]] int32_t ref_start,
212 size_t ref_length)
213{
214 if (rid > -1 && ref_start > -1 && // read is mapped
215 !cigar_vector.empty() && // alignment field was not empty
216 !std::ranges::empty(get<1>(align))) // seq field was not empty
217 {
218 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
219 {
220 assert(static_cast<size_t>(ref_start + ref_length) <= std::ranges::size(ref_seqs[rid]));
221 // copy over unaligned reference sequence part
222 assign_unaligned(get<0>(align), ref_seqs[rid] | views::slice(ref_start, ref_start + ref_length));
223 }
224 else
225 {
227 auto dummy_seq = views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, ref_length)
229 static_assert(std::same_as<unaligned_t, decltype(dummy_seq)>,
230 "No reference information was given so the type of the first alignment tuple position"
231 "must have an unaligned sequence type of a dummy sequence ("
232 "views::repeat_n(dna5{}, size_t{}) | "
233 "std::views::transform(detail::access_restrictor_fn{}))");
234
235 assign_unaligned(get<0>(align), dummy_seq); // assign dummy sequence
236 }
237
238 // insert gaps according to the cigar information
239 detail::alignment_from_cigar(align, cigar_vector);
240 }
241 else // not enough information for an alignment, assign an empty view/dummy_sequence
242 {
243 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference info given
244 {
245 assert(std::ranges::size(ref_seqs) > 0); // we assume that the given ref info is not empty
246 assign_unaligned(get<0>(align), ref_seqs[0] | views::slice(0, 0));
247 }
248 else
249 {
251 assign_unaligned(get<0>(align), views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, 0)
253 }
254 }
255}
256
266template <typename stream_view_t>
267inline void format_sam_base::read_byte_field(stream_view_t && stream_view, std::byte & byte_target)
268{
269 // unfortunately std::from_chars only accepts char const * so we need a buffer.
270 auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
271 (void) ignore;
272
273 uint8_t byte{};
274 // std::from_chars cannot directly parse into a std::byte
275 std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, byte, 16);
276
277 if (res.ec == std::errc::invalid_argument || res.ptr != end)
278 throw format_error{std::string("[CORRUPTED SAM FILE] The string '") +
280 "' could not be cast into type uint8_t."};
281
282 if (res.ec == std::errc::result_out_of_range)
283 throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end) +
284 "' into type uint8_t would cause an overflow."};
285 byte_target = std::byte{byte};
286}
287
295template <typename stream_view_type, std::ranges::forward_range target_range_type>
296inline void format_sam_base::read_forward_range_field(stream_view_type && stream_view, target_range_type & target)
297{
298 using target_range_value_t = std::ranges::range_value_t<target_range_type>;
299 using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
300 using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
301
302 // Note that we need to cache the begin iterator since the stream_view is an input range that may be consuming
303 // and in that case might read `past-the-end` on a second call of std::ranges::begin.
304 if (auto it = std::ranges::begin(stream_view); it != std::ranges::end(stream_view))
305 {
306 // Write to target if field does not represent an empty string, denoted as single '*' character.
307 if (char c = *it; !(++it == std::ranges::end(stream_view) && c == '*'))
308 {
309 target.push_back(seqan3::assign_char_to(c, target_range_value_t{}));
310 std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
311 | views::char_to<target_range_value_t>,
312 std::back_inserter(target));
313 }
314 }
315}
316
327template <typename stream_view_t, arithmetic arithmetic_target_type>
328inline void format_sam_base::read_arithmetic_field(stream_view_t && stream_view, arithmetic_target_type & arithmetic_target)
329{
330 // unfortunately std::from_chars only accepts char const * so we need a buffer.
331 auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
332 (void) ignore;
333 std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, arithmetic_target);
334
335 if (res.ec == std::errc::invalid_argument || res.ptr != end)
336 throw format_error{std::string("[CORRUPTED SAM FILE] The string '") +
338 "' could not be cast into type " +
339 detail::type_name_as_string<arithmetic_target_type>};
340
341 if (res.ec == std::errc::result_out_of_range)
342 throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end) +
343 "' into type " + detail::type_name_as_string<arithmetic_target_type> +
344 " would cause an overflow."};
345}
346
363template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
364inline void format_sam_base::read_header(stream_view_type && stream_view,
366 ref_seqs_type & /*ref_id_to_pos_map*/)
367{
368 auto it = std::ranges::begin(stream_view);
369 auto end = std::ranges::end(stream_view);
370 std::vector<char> string_buffer{};
371
372 auto make_tag = [] (uint8_t char1, uint8_t char2) constexpr
373 {
374 return static_cast<uint16_t>(char1) | (static_cast<uint16_t>(char2) << CHAR_BIT);
375 };
376
377 std::array<char, 2> raw_tag{};
378
379 auto parse_and_make_tag = [&] ()
380 {
381 raw_tag[0] = *it;
382 ++it;
383 raw_tag[1] = *it;
384 ++it;
385 return make_tag(raw_tag[0], raw_tag[1]);
386 };
387
388 auto take_until_predicate = [&it, &string_buffer] (auto const & predicate)
389 {
390 string_buffer.clear();
391 while (!predicate(*it))
392 {
393 string_buffer.push_back(*it);
394 ++it;
395 }
396 };
397
398 auto skip_until_predicate = [&it] (auto const & predicate)
399 {
400 while (!predicate(*it))
401 ++it;
402 };
403
404 auto copy_next_tag_value_into_buffer = [&] ()
405 {
406 skip_until_predicate(is_char<':'>);
407 ++it; // skip :
408 take_until_predicate(is_char<'\t'> || is_char<'\n'>);
409 };
410
411 // Some tags are not parsed individually. Instead, these are simply copied into a std::string.
412 // Multiple tags must be separated by a `\t`, hence we prepend a tab to the string, except the first time.
413 // Alternatively, we could always append a `\t`, but this would have the side effect that we might need to trim a
414 // trailing tab after parsing all tags via `pop_back()`.
415 // Unfortunately, we do not know when we are parsing the last tag (and in this case just not append a tab),
416 // because even though we can check if the line ends in a `\n`, it is not guaranteed that the last tag of the
417 // line is passed to this lambda. For example, the line might end with a tag that is properly parsed, such as `ID`.
418 auto parse_and_append_unhandled_tag_to_string = [&] (std::string & value, std::array<char, 2> raw_tag)
419 {
420 take_until_predicate(is_char<'\t'> || is_char<'\n'>);
421 if (!value.empty())
422 value.push_back('\t');
423 value.push_back(raw_tag[0]);
424 value.push_back(raw_tag[1]);
425 read_forward_range_field(string_buffer, value);
426 };
427
428 auto print_cerr_of_unspported_tag = [&it] (char const * const header_tag, std::array<char, 2> raw_tag)
429 {
430 std::cerr << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0] << raw_tag[1] << '\n';
431 };
432
433 while (it != end && is_char<'@'>(*it))
434 {
435 ++it; // skip @
436
437 switch (parse_and_make_tag())
438 {
439 case make_tag('H', 'D'): // HD (header) tag
440 {
441 // All tags can appear in any order, VN is the only required tag
442 while (is_char<'\t'>(*it))
443 {
444 ++it; // skip tab
445 std::string * header_entry{nullptr};
446
447 switch (parse_and_make_tag())
448 {
449 case make_tag('V', 'N'): // parse required VN (version) tag
450 {
451 header_entry = std::addressof(hdr.format_version);
452 break;
453 }
454 case make_tag('S', 'O'): // SO (sorting) tag
455 {
456 header_entry = std::addressof(hdr.sorting);
457 break;
458 }
459 case make_tag('S', 'S'): // SS (sub-order) tag
460 {
461 header_entry = std::addressof(hdr.subsorting);
462 break;
463 }
464 case make_tag('G', 'O'): // GO (grouping) tag
465 {
466 header_entry = std::addressof(hdr.grouping);
467 break;
468 }
469 default: // unsupported header tag
470 {
471 print_cerr_of_unspported_tag("HD", raw_tag);
472 }
473 }
474
475 if (header_entry != nullptr)
476 {
477 copy_next_tag_value_into_buffer();
478 read_forward_range_field(string_buffer, *header_entry);
479 }
480 else
481 skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
482 }
483 ++it; // skip newline
484
485 if (hdr.format_version.empty())
486 throw format_error{std::string{"The required VN tag in @HD is missing."}};
487
488 break;
489 }
490
491 case make_tag('S', 'Q'): // SQ (sequence dictionary) tag
492 {
494 std::ranges::range_value_t<decltype(hdr.ref_ids())> id;
495 std::optional<int32_t> sequence_length{};
497
498 // All tags can appear in any order, SN and LN are required tags
499 while (is_char<'\t'>(*it))
500 {
501 ++it; // skip tab
502
503 switch (parse_and_make_tag())
504 {
505 case make_tag('S', 'N'): // parse required SN (sequence name) tag
506 {
507 copy_next_tag_value_into_buffer();
508 read_forward_range_field(string_buffer, id);
509 break;
510 }
511 case make_tag('L', 'N'): // parse required LN (length) tag
512 {
513 int32_t sequence_length_tmp{};
514 copy_next_tag_value_into_buffer();
515 read_arithmetic_field(string_buffer, sequence_length_tmp);
516 sequence_length = sequence_length_tmp;
517 break;
518 }
519 default: // Any other tag
520 {
521 parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
522 }
523 }
524 }
525 ++it; // skip newline
526
527 if (id.empty())
528 throw format_error{std::string{"The required SN tag in @SQ is missing."}};
529 if (!sequence_length.has_value())
530 throw format_error{std::string{"The required LN tag in @SQ is missing."}};
531 if (sequence_length.value() <= 0)
532 throw format_error{std::string{"The value of LN in @SQ must be positive."}};
533
534 get<0>(info) = sequence_length.value();
535 // If reference information was given, the ids exist and we can fill ref_dict directly.
536 // If not, we need to update the ids first and fill the reference dictionary afterwards.
537 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference information given
538 {
539 auto id_it = hdr.ref_dict.find(id);
540
541 if (id_it == hdr.ref_dict.end())
542 throw format_error{detail::to_string("Unknown reference name '", id, "' found in SAM header ",
543 "(header.ref_ids(): ", hdr.ref_ids(), ").")};
544
545 auto & given_ref_info = hdr.ref_id_info[id_it->second];
546
547 if (std::get<0>(given_ref_info) != std::get<0>(info))
548 throw format_error{"Provided and header-based reference length differ."};
549
550 hdr.ref_id_info[id_it->second] = std::move(info);
551 }
552 else
553 {
554 static_assert(!detail::is_type_specialisation_of_v<decltype(hdr.ref_ids()), std::deque>,
555 "The range over reference ids must be of type std::deque such that pointers are not "
556 "invalidated.");
557
558 hdr.ref_ids().push_back(id);
559 hdr.ref_id_info.push_back(info);
560 hdr.ref_dict[(hdr.ref_ids())[(hdr.ref_ids()).size() - 1]] = (hdr.ref_ids()).size() - 1;
561 }
562 break;
563 }
564
565 case make_tag('R', 'G'): // RG (read group) tag
566 {
568
569 // All tags can appear in any order, SN and LN are required tags
570 while (is_char<'\t'>(*it))
571 {
572 ++it; // skip tab
573
574 switch (parse_and_make_tag())
575 {
576 case make_tag('I', 'D'): // parse required ID tag
577 {
578 copy_next_tag_value_into_buffer();
579 read_forward_range_field(string_buffer, get<0>(tmp));
580 break;
581 }
582 default: // Any other tag
583 {
584 parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
585 }
586 }
587 }
588 ++it; // skip newline
589
590 if (get<0>(tmp).empty())
591 throw format_error{std::string{"The required ID tag in @RG is missing."}};
592
593 hdr.read_groups.emplace_back(std::move(tmp));
594 break;
595 }
596
597 case make_tag('P', 'G'): // PG (program) tag
598 {
600
601 // All tags can appear in any order, ID is the only required tag
602 while (is_char<'\t'>(*it))
603 {
604 ++it; // skip tab
605 std::string * program_info_entry{nullptr};
606
607 switch (parse_and_make_tag())
608 {
609 case make_tag('I', 'D'): // read required ID tag
610 {
611 program_info_entry = std::addressof(tmp.id);
612 break;
613 }
614 case make_tag('P', 'N'): // PN (program name) tag
615 {
616 program_info_entry = std::addressof(tmp.name);
617 break;
618 }
619 case make_tag('P', 'P'): // PP (previous program) tag
620 {
621 program_info_entry = std::addressof(tmp.previous);
622 break;
623 }
624 case make_tag('C', 'L'): // CL (command line) tag
625 {
626 program_info_entry = std::addressof(tmp.command_line_call);
627 break;
628 }
629 case make_tag('D', 'S'): // DS (description) tag
630 {
631 program_info_entry = std::addressof(tmp.description);
632 break;
633 }
634 case make_tag('V', 'N'): // VN (version) tag
635 {
636 program_info_entry = std::addressof(tmp.version);
637 break;
638 }
639 default: // unsupported header tag
640 {
641 print_cerr_of_unspported_tag("PG", raw_tag);
642 }
643 }
644
645 if (program_info_entry != nullptr)
646 {
647 copy_next_tag_value_into_buffer();
648 read_forward_range_field(string_buffer, *program_info_entry);
649 }
650 else
651 skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
652 }
653 ++it; // skip newline
654
655 if (tmp.id.empty())
656 throw format_error{std::string{"The required ID tag in @PG is missing."}};
657
658 hdr.program_infos.emplace_back(std::move(tmp));
659 break;
660 }
661
662 case make_tag('C', 'O'): // CO (comment) tag
663 {
664 ++it; // skip tab
665 std::string tmp;
666 take_until_predicate(is_char<'\n'>);
667 read_forward_range_field(string_buffer, tmp);
668 ++it; // skip newline
669 hdr.comments.emplace_back(std::move(tmp));
670 break;
671 }
672
673 default:
674 throw format_error{std::string{"Illegal SAM header tag starting with:"} + *it};
675 }
676 }
677}
678
695template <typename stream_t, typename ref_ids_type>
696inline void format_sam_base::write_header(stream_t & stream,
697 sam_file_output_options const & options,
699{
700 // -----------------------------------------------------------------
701 // Check Header
702 // -----------------------------------------------------------------
703
704 // (@HD) Check header line
705 // The format version string will be taken from the local member variable
706 if (!header.sorting.empty() &&
707 !(header.sorting == "unknown" ||
708 header.sorting == "unsorted" ||
709 header.sorting == "queryname" ||
710 header.sorting == "coordinate" ))
711 throw format_error{"SAM format error: The header.sorting member must be "
712 "one of [unknown, unsorted, queryname, coordinate]."};
713
714 if (!header.grouping.empty() &&
715 !(header.grouping == "none" ||
716 header.grouping == "query" ||
717 header.grouping == "reference"))
718 throw format_error{"SAM format error: The header.grouping member must be "
719 "one of [none, query, reference]."};
720
721 // (@SQ) Check Reference Sequence Dictionary lines
722
723 // TODO
724
725 // - sorting order be one of ...
726 // - grouping can be one of ...
727 // - reference names must be unique
728 // - ids of read groups must be unique
729 // - program ids need to be unique
730 // many more small semantic things, like fits REGEX
731
732 // -----------------------------------------------------------------
733 // Write Header
734 // -----------------------------------------------------------------
735 std::ostreambuf_iterator stream_it{stream};
736
737 // (@HD) Write header line [required].
738 stream << "@HD\tVN:";
739 std::ranges::copy(format_version, stream_it);
740
741 if (!header.sorting.empty())
742 stream << "\tSO:" << header.sorting;
743
744 if (!header.subsorting.empty())
745 stream << "\tSS:" << header.subsorting;
746
747 if (!header.grouping.empty())
748 stream << "\tGO:" << header.grouping;
749
750 detail::write_eol(stream_it, options.add_carriage_return);
751
752 // (@SQ) Write Reference Sequence Dictionary lines [required].
753 for (auto const & [ref_name, ref_info] : views::zip(header.ref_ids(), header.ref_id_info))
754 {
755 stream << "@SQ\tSN:";
756
757 std::ranges::copy(ref_name, stream_it);
758
759 stream << "\tLN:" << get<0>(ref_info);
760
761 if (!get<1>(ref_info).empty())
762 stream << "\t" << get<1>(ref_info);
763
764 detail::write_eol(stream_it, options.add_carriage_return);
765 }
766
767 // Write read group (@RG) lines if specified.
768 for (auto const & read_group : header.read_groups)
769 {
770 stream << "@RG"
771 << "\tID:" << get<0>(read_group);
772
773 if (!get<1>(read_group).empty())
774 stream << "\t" << get<1>(read_group);
775
776 detail::write_eol(stream_it, options.add_carriage_return);
777 }
778
779 // Write program (@PG) lines if specified.
780 for (auto const & program : header.program_infos)
781 {
782 stream << "@PG"
783 << "\tID:" << program.id;
784
785 if (!program.name.empty())
786 stream << "\tPN:" << program.name;
787
788 if (!program.command_line_call.empty())
789 stream << "\tCL:" << program.command_line_call;
790
791 if (!program.previous.empty())
792 stream << "\tPP:" << program.previous;
793
794 if (!program.description.empty())
795 stream << "\tDS:" << program.description;
796
797 if (!program.version.empty())
798 stream << "\tVN:" << program.version;
799
800 detail::write_eol(stream_it, options.add_carriage_return);
801 }
802
803 // Write comment (@CO) lines if specified.
804 for (auto const & comment : header.comments)
805 {
806 stream << "@CO\t" << comment;
807 detail::write_eol(stream_it, options.add_carriage_return);
808 }
809}
810
811} // namespace seqan3::detail
T addressof(T... args)
T back_inserter(T... args)
T begin(T... args)
Provides seqan3::views::char_to.
The alignment base format.
Definition: format_sam_base.hpp:44
format_sam_base()=default
Defaulted.
void check_and_assign_ref_id(ref_id_type &ref_id, ref_id_tmp_type &ref_id_tmp, header_type &header, ref_seqs_type &)
Checks for known reference ids or adds a new reference is and assigns a reference id to ref_id.
Definition: format_sam_base.hpp:123
void read_arithmetic_field(stream_view_t &&stream_view, arithmetic_target_type &arithmetic_target)
Reads arithmetic fields using std::from_chars.
Definition: format_sam_base.hpp:328
format_sam_base(format_sam_base &&)=default
Defaulted.
std::array< char, 316 > arithmetic_buffer
A buffer used when parsing arithmetic values with std::from_chars.
Definition: format_sam_base.hpp:62
void write_header(stream_t &stream, sam_file_output_options const &options, sam_file_header< ref_ids_type > &header)
Writes the SAM header.
Definition: format_sam_base.hpp:696
void construct_alignment(align_type &align, std::vector< cigar > &cigar_vector, int32_t rid, ref_seqs_type &ref_seqs, int32_t ref_start, size_t ref_length)
Construct the field::alignment depending on the given information.
Definition: format_sam_base.hpp:207
bool ref_info_present_in_header
Tracks whether reference information (@SQ tag) were found in the SAM header.
Definition: format_sam_base.hpp:68
void transfer_soft_clipping_to(std::vector< cigar > const &cigar_vector, int32_t &sc_begin, int32_t &sc_end) const
Transfer soft clipping information from the cigar_vector to sc_begin and sc_end.
Definition: format_sam_base.hpp:165
format_sam_base(format_sam_base const &)=default
Defaulted.
void read_forward_range_field(stream_view_type &&stream_view, target_range_type &target)
Reads a range by copying from stream_view to target, converting values with seqan3::views::char_to.
Definition: format_sam_base.hpp:296
bool header_was_written
A variable that tracks whether the content of header has been written or not.
Definition: format_sam_base.hpp:65
void read_header(stream_view_type &&stream_view, sam_file_header< ref_ids_type > &hdr, ref_seqs_type &)
Reads the SAM header.
Definition: format_sam_base.hpp:364
format_sam_base & operator=(format_sam_base const &)=default
Defaulted.
~format_sam_base()=default
Defaulted.
void read_byte_field(stream_view_t &&stream_view, std::byte &byte_target)
Reads std::byte fields using std::from_chars.
Definition: format_sam_base.hpp:267
format_sam_base & operator=(format_sam_base &&)=default
Defaulted.
The format that prints the version to std::cout.
Definition: format_help.hpp:431
Stores the header information of alignment files.
Definition: header.hpp:34
std::vector< std::pair< std::string, std::string > > read_groups
The Read Group Dictionary (used by the SAM/BAM format).
Definition: header.hpp:214
std::string sorting
The sorting of the file. SAM: [unknown, unsorted, queryname, coordinate].
Definition: header.hpp:79
ref_ids_type & ref_ids()
The range of reference ids.
Definition: header.hpp:139
std::unordered_map< key_type, int32_t, key_hasher, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition: header.hpp:178
std::vector< std::tuple< int32_t, std::string > > ref_id_info
The reference information. (used by the SAM/BAM format)
Definition: header.hpp:175
std::string format_version
The file format version. Note: this is overwritten by our formats on output.
Definition: header.hpp:78
std::vector< std::string > comments
The list of comments.
Definition: header.hpp:85
std::string grouping
The grouping of the file. SAM: [none, query, reference].
Definition: header.hpp:81
std::string subsorting
The sub-sorting of the file. SAM: [unknown, unsorted, queryname, coordinate](:[A-Za-z0-9_-]+)+.
Definition: header.hpp:80
std::vector< program_info_t > program_infos
The list of program information.
Definition: header.hpp:83
Provides various utility functions.
T data(T... args)
T emplace_back(T... args)
T empty(T... args)
T from_chars(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: concept.hpp:526
void alignment_from_cigar(alignment_type &alignment, std::vector< cigar > const &cigar_vector)
Transforms a std::vector of operation-count pairs (representing the cigar string).
Definition: cigar.hpp:381
constexpr void write_eol(it_t &it, bool const add_cr)
Write "\n" or "\r\n" to the stream iterator, depending on arguments.
Definition: misc.hpp:49
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:495
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:183
constexpr auto zip
A zip view.
Definition: zip.hpp:29
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::sam_file_header class.
Provides various utility functions.
Auxiliary functions for the alignment IO.
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
std::string to_string(value_type &&...values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition: to_string.hpp:29
T push_back(T... args)
Provides seqan3::debug_stream and related types.
The <ranges> header from C++20's standard library.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_output_format and auxiliary classes.
T size(T... args)
Provides seqan3::views::slice.
A functor that always throws when calling operator() (needed for the alignment "dummy" sequence).
Definition: cigar.hpp:434
Object storing information for a search (of a search scheme).
Definition: search_scheme_precomputed.hpp:28
Thrown if information given to output format didn't match expectations.
Definition: exception.hpp:91
Stores information of the program/tool that was used to create the file.
Definition: header.hpp:69
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:26
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:30
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.