Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ksw2 to align soft-clipped ends from ungapped alignment #382

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
Turn the ref parameter of some functions into string_view
  • Loading branch information
marcelm committed Feb 22, 2024
commit b89e3d78b43c86a107299c3f4faec54ee17d1cf1
10 changes: 5 additions & 5 deletions src/aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include "ksw2/ksw2.h"
#include "aligner.hpp"

AlignmentInfo Aligner::align(const std::string &query, const std::string &ref) const {
AlignmentInfo Aligner::align(const std::string& query, const std::string_view ref) const {
m_align_calls++;
AlignmentInfo aln;
int32_t maskLen = query.length() / 2;
Expand All @@ -30,8 +30,8 @@ AlignmentInfo Aligner::align(const std::string &query, const std::string &ref) c

StripedSmithWaterman::Alignment alignment_ssw;

// query must be NULL-terminated
auto flag = ssw_aligner.Align(query.c_str(), ref.c_str(), ref.size(), filter, &alignment_ssw, maskLen);
// only query must be NULL-terminated
auto flag = ssw_aligner.Align(query.c_str(), ref.begin(), ref.size(), filter, &alignment_ssw, maskLen);
if (flag != 0) {
aln.edit_distance = 100000;
aln.ref_start = 0;
Expand Down Expand Up @@ -121,7 +121,7 @@ AlignmentInfo Aligner::align(const std::string &query, const std::string &ref) c
* of the query, once for each end.
*/
std::tuple<size_t, size_t, int> highest_scoring_segment(
const std::string& query, const std::string& ref, int match, int mismatch, int end_bonus
const std::string& query, const std::string_view ref, int match, int mismatch, int end_bonus
) {
size_t n = query.length();

Expand Down Expand Up @@ -156,7 +156,7 @@ std::tuple<size_t, size_t, int> highest_scoring_segment(
}

AlignmentInfo hamming_align(
const std::string &query, const std::string &ref, int match, int mismatch, int end_bonus
const std::string &query, const std::string_view ref, int match, int mismatch, int end_bonus
) {
AlignmentInfo aln;
if (query.length() != ref.length()) {
Expand Down
9 changes: 5 additions & 4 deletions src/aligner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ struct Aligner {
ksw_gen_simple_mat(ksw_matrix_m, ksw_matrix, parameters.match, -parameters.mismatch);
}

AlignmentInfo align(const std::string &query, const std::string &ref) const;
AlignmentInfo align(const std::string& query, const std::string_view ref) const;
AlignmentInfo ksw_extend(const std::string& query, const std::string& ref, bool right_align) const;

AlignmentParameters parameters;

unsigned calls_count() {
Expand All @@ -55,7 +56,7 @@ struct Aligner {
int8_t ksw_matrix[25];
};

inline int hamming_distance(const std::string &s, const std::string &t) {
inline int hamming_distance(const std::string& s, const std::string_view t) {
if (s.length() != t.length()){
return -1;
}
Expand All @@ -71,11 +72,11 @@ inline int hamming_distance(const std::string &s, const std::string &t) {
}

std::tuple<size_t, size_t, int> highest_scoring_segment(
const std::string& query, const std::string& ref, int match, int mismatch, int end_bonus
const std::string& query, const std::string_view ref, int match, int mismatch, int end_bonus
);

AlignmentInfo hamming_align(
const std::string &query, const std::string &ref, int match, int mismatch, int end_bonus
const std::string& query, const std::string_view ref, int match, int mismatch, int end_bonus
);

#endif
4 changes: 2 additions & 2 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ inline Alignment extend_seed(
bool consistent_nam
) {
const std::string query = nam.is_rc ? read.rc : read.seq;
const std::string& ref = references.sequences[nam.ref_id];
const std::string_view ref = references.sequences[nam.ref_id];

const auto projected_ref_start = nam.projected_ref_start();
const auto projected_ref_end = std::min(nam.ref_end + query.size() - nam.query_end, ref.size());
Expand All @@ -222,7 +222,7 @@ inline Alignment extend_seed(
int result_ref_start;
bool gapped = true;
if (projected_ref_end - projected_ref_start == query.size() && consistent_nam) {
std::string ref_segm_ham = ref.substr(projected_ref_start, query.size());
std::string_view ref_segm_ham = ref.substr(projected_ref_start, query.size());
auto hamming_dist = hamming_distance(query, ref_segm_ham);

if (hamming_dist >= 0 && (((float) hamming_dist / query.size()) < 0.05) ) { //Hamming distance worked fine, no need to ksw align
Expand Down