30#include <seqan3/io/detail/record.hpp>
127 requires std::same_as<typename t::ref_sequences, ref_info_not_given>
129 requires alphabet<std::ranges::range_reference_t<
130 std::ranges::range_reference_t<typename t::ref_sequences>>>;
135 && (!std::same_as<typename t::ref_sequences, ref_info_not_given>
137 std::ranges::range_reference_t<std::ranges::range_reference_t<typename t::ref_ids>>>);
138 requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
139 requires std::ranges::forward_range<typename t::ref_ids>;
169template <
typename ref_sequences_t = ref_info_not_given,
typename ref_
ids_t = std::deque<std::
string>>
184 template <
typename _sequence_alphabet>
188 template <
typename _
id_alphabet>
195 template <
typename _quality_alphabet>
225template <sam_file_input_traits traits_type_ = sam_file_input_default_traits<>,
226 detail::fields_specialisation selected_field_
ids_ = fields<field::seq,
237 detail::type_list_of_sam_file_input_formats val
id_formats_ = type_list<format_sam, format_bam>>
257 using dummy_ref_type =
decltype(
views::repeat_n(
typename traits_type::sequence_alphabet{},
size_t{})
258 | std::views::transform(detail::access_restrictor_fn{}));
261 using ref_sequence_unsliced_type = detail::lazy_conditional_t<
262 std::ranges::range<typename traits_type::ref_sequences const>,
263 detail::lazy<std::ranges::range_reference_t, typename traits_type::ref_sequences const>,
267 using ref_sequence_sliced_type =
decltype(std::declval<ref_sequence_unsliced_type>() |
views::slice(0, 0));
278 using id_type =
typename traits_type::template id_container<char>;
287 ref_sequence_sliced_type>;
306 using quality_type =
typename traits_type::template quality_container<typename traits_type::quality_alphabet>;
361 "The seqan3::field::alignment was removed from the allowed fields for seqan3::sam_file_input. "
362 "Only seqan3::field::cigar is supported. Please see seqan3::alignment_from_cigar on how to get an "
363 "alignment from the cigar information.");
366 "The field::offset is deprecated. Please access field::cigar and retrieve the soft clipping (S) "
367 "value at the front of the CIGAR string (offset = 0 if there is no soft clipping at the front).");
372 for (
field f : selected_field_ids::as_array)
373 if (!field_ids::contains(f))
377 "You selected a field that is not valid for SAM files, please refer to the documentation "
378 "of sam_file_input::field_ids for the accepted values.");
400 using iterator = detail::in_file_iterator<sam_file_input>;
442 primary_stream{new
std::ifstream{}, stream_deleter_default}
444 init_by_filename(std::move(filename));
466 template <input_stream stream_t, sam_file_input_format file_format>
467 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type,
stream_char_type>
469 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
471 primary_stream{&stream, stream_deleter_noop}
473 init_by_format<file_format>();
477 template <input_stream stream_t, sam_file_input_format file_format>
478 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type,
stream_char_type>
480 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
482 primary_stream{new stream_t{
std::move(stream)}, stream_deleter_default}
484 init_by_format<file_format>();
516 typename traits_type::ref_ids & ref_ids,
517 typename traits_type::ref_sequences & ref_sequences,
519 primary_stream{new
std::ifstream{}, stream_deleter_default}
522 set_references(ref_ids, ref_sequences);
524 init_by_filename(std::move(filename));
557 template <input_stream stream_t, sam_file_input_format file_format>
559 typename traits_type::ref_ids & ref_ids,
560 typename traits_type::ref_sequences & ref_sequences,
561 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
563 primary_stream{&stream, stream_deleter_noop}
566 set_references(ref_ids, ref_sequences);
568 init_by_format<file_format>();
572 template <input_stream stream_t, sam_file_input_format file_format>
574 typename traits_type::ref_ids & ref_ids,
575 typename traits_type::ref_sequences & ref_sequences,
576 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
578 primary_stream{new stream_t{
std::move(stream)}, stream_deleter_default}
581 set_references(ref_ids, ref_sequences);
583 init_by_format<file_format>();
589 typename traits_type::ref_ids &&,
590 typename traits_type::ref_sequences &&,
593 template <input_stream stream_t, sam_file_input_format file_format>
595 typename traits_type::ref_ids &&,
596 typename traits_type::ref_sequences &&,
625 if (!first_record_was_read)
628 first_record_was_read =
true;
699 if (!first_record_was_read)
702 first_record_was_read =
true;
714 primary_stream->rdbuf()->pubsetbuf(stream_buffer.
data(), stream_buffer.
size());
716 ->open(filename, std::ios_base::in | std::ios::binary);
718 if (!primary_stream->good())
721 secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
722 detail::set_format(format, filename);
726 template <
typename format_type>
727 void init_by_format()
729 static_assert(list_traits::contains<format_type, valid_formats>,
730 "You selected a format that is not in the valid_formats of this file.");
732 format = detail::sam_file_input_format_exposer<format_type>{};
733 secondary_stream = detail::make_secondary_istream(*primary_stream);
766 stream_ptr_t primary_stream{
nullptr, stream_deleter_noop};
768 stream_ptr_t secondary_stream{
nullptr, stream_deleter_noop};
771 bool first_record_was_read{
false};
776 using format_type =
typename detail::variant_from_tags<valid_formats, detail::sam_file_input_format_exposer>::type;
786 typename traits_type::ref_sequences
const * reference_sequences_ptr{
nullptr};
800 template <std::ranges::forward_range ref_sequences_t>
801 void set_references(
typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
803 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
806 reference_sequences_ptr = &ref_sequences;
809 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
811 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]),
"");
813 if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
814 && std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
815 && std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
817 auto &&
id = header_ptr->ref_ids()[idx];
818 header_ptr->ref_dict[
std::span{std::ranges::data(
id), std::ranges::size(
id)}] = idx;
822 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
829 void read_next_record()
832 record_buffer.clear();
833 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
843 auto call_read_func = [
this](
auto & ref_seq_info)
848 f.read_alignment_record(*secondary_stream,
853 detail::get_or_ignore<field::seq>(record_buffer),
854 detail::get_or_ignore<field::qual>(record_buffer),
855 detail::get_or_ignore<field::id>(record_buffer),
856 detail::get_or_ignore<field::ref_seq>(record_buffer),
857 detail::get_or_ignore<field::ref_id>(record_buffer),
858 detail::get_or_ignore<field::ref_offset>(record_buffer),
859 detail::get_or_ignore<field::cigar>(record_buffer),
860 detail::get_or_ignore<field::flag>(record_buffer),
861 detail::get_or_ignore<field::mapq>(record_buffer),
862 detail::get_or_ignore<field::mate>(record_buffer),
863 detail::get_or_ignore<field::tags>(record_buffer),
864 detail::get_or_ignore<field::evalue>(record_buffer),
865 detail::get_or_ignore<field::bit_score>(record_buffer));
870 assert(!
format.valueless_by_exception());
872 if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
873 call_read_func(*reference_sequences_ptr);
875 call_read_func(std::ignore);
887template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_
ids>
894template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_
ids>
901template <input_stream stream_type, sam_file_input_format file_format>
908template <input_stream stream_type, sam_file_input_format file_format>
915template <std::ranges::forward_range ref_ids_t,
916 std::ranges::forward_range ref_sequences_t,
925template <std::ranges::forward_range ref_
ids_t, std::ranges::forward_range ref_sequences_t>
932template <input_stream stream_type,
933 std::ranges::forward_range ref_ids_t,
934 std::ranges::forward_range ref_sequences_t,
944template <input_stream stream_type,
945 std::ranges::forward_range ref_ids_t,
946 std::ranges::forward_range ref_sequences_t,
956template <input_stream stream_type,
957 std::ranges::forward_range ref_ids_t,
958 std::ranges::forward_range ref_sequences_t,
966template <input_stream stream_type,
967 std::ranges::forward_range ref_ids_t,
968 std::ranges::forward_range ref_sequences_t,
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition dna15.hpp:48
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition dna5.hpp:48
Quality type for traditional Sanger and modern Illumina Phred scores.
Definition phred42.hpp:44
The SAM tag dictionary class that stores all optional SAM fields.
Definition sam_tag_dictionary.hpp:327
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
field
An enumerator for the fields used in file formats.
Definition record.hpp:60
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition slice.hpp:175
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition repeat_n.hpp:88
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be explicitly converted to to.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
Provides seqan3::views::slice.
A class template that holds a choice of seqan3::field.
Definition record.hpp:125
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition io/exception.hpp:36
Type that contains multiple types.
Definition type_list.hpp:26
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.