Enother interesting task!
What I do: suppose there is such superstring S. Let the first read coordinate on S be 0. In MainFunc I find all right tails for first read and calculate their coordinates on S. Using recursion, I find everething that goes after first read to the right.
Then I reverse all that rests and do the same. Then I reverse it back and get the "left tail".
Given: At most 50 DNA strings whose length does not exceed 1 kbp in FASTA format (which represent reads deriving from the same strand of a single linear chromosome).
The dataset is guaranteed to satisfy the following condition: there exists a unique way to reconstruct the entire chromosome from these reads by gluing together pairs of reads that overlap by more than half their length.
Return: A shortest superstring containing all the given strings (thus corresponding to a reconstructed chromosome).
def find_middle(len1):
if len1 % 2 == 0:
return len1/2 + 1
else:
return (len1 + 1)/2
def find_max_middle(len1, len2):
mlen1 = find_middle(len1)
mlen2 = find_middle(len2)
if mlen1 > mlen2:
return mlen1
else:
return mlen2
def MainFunc(find_what_reads, find_where_reads, find_what_positions, res_string):
find_what_reads1 = []
find_what_positions1 = []
find_what_counter = 0
for find_what in find_what_reads:
find_where_counter = 0
for find_where in find_where_reads:
find_what_len = len(find_what)
find_where_len = len(find_where)
max_middle = find_max_middle(find_what_len, find_where_len)
a = find_what_len - max_middle
find_what_part = find_what[a:]
coincidence = find_where.find(find_what_part, 0)
if coincidence > 0:
find_where_res_string_position = find_what_positions[find_what_counter] + a - coincidence
d = find_where_len - len(res_string[find_where_res_string_position:])
if d > 0:
res_string = res_string + find_where[-d:]
find_where_reads[find_where_counter] = ''
find_what_reads1.append(find_where)
find_what_positions1.append(find_where_res_string_position)
find_where_counter += 1
find_what_counter += 1
if len(find_what_reads1) > 0:
res_string = MainFunc(find_what_reads1, find_where_reads, find_what_positions1, res_string)['res_string']
find_where_reads = MainFunc(find_what_reads1, find_where_reads, find_what_positions1, res_string)['find_where_reads']
res = {}
res['res_string'] = res_string
res['find_where_reads'] = find_where_reads
return res
def main():
reads = []
f = open('16.txt', 'r')
read = ''
for line in f:
if line[0] != '>':
read = read + line
else:
if read != '':
read_clean = read.replace('\n', '')
reads.append(read_clean)
read = ''
read_clean = read.replace('\n', '')
reads.append(read_clean)
res_string = reads[0]
n = len(res_string)
res_string_right2left = reads[0]
find_what_positions = [0]
find_what_reads = [reads[0]]
res_left2right = MainFunc(find_what_reads, reads, find_what_positions, res_string)
string_left2right = res_left2right['res_string']
find_where_reads_left2right = res_left2right['find_where_reads']
find_where_reads_right2left = []
for r in find_where_reads_left2right:
find_where_reads_right2left.append(r[::-1])
res_right2left = MainFunc([res_string_right2left[::-1]], find_where_reads_right2left, find_what_positions, res_string_right2left[::-1])
string_right2left = res_right2left['res_string']
print string_right2left[n:][::-1] + string_left2right
if __name__ == '__main__':
main()
What I do: suppose there is such superstring S. Let the first read coordinate on S be 0. In MainFunc I find all right tails for first read and calculate their coordinates on S. Using recursion, I find everething that goes after first read to the right.
Then I reverse all that rests and do the same. Then I reverse it back and get the "left tail".
Given: At most 50 DNA strings whose length does not exceed 1 kbp in FASTA format (which represent reads deriving from the same strand of a single linear chromosome).
The dataset is guaranteed to satisfy the following condition: there exists a unique way to reconstruct the entire chromosome from these reads by gluing together pairs of reads that overlap by more than half their length.
Return: A shortest superstring containing all the given strings (thus corresponding to a reconstructed chromosome).
def find_middle(len1):
if len1 % 2 == 0:
return len1/2 + 1
else:
return (len1 + 1)/2
def find_max_middle(len1, len2):
mlen1 = find_middle(len1)
mlen2 = find_middle(len2)
if mlen1 > mlen2:
return mlen1
else:
return mlen2
def MainFunc(find_what_reads, find_where_reads, find_what_positions, res_string):
find_what_reads1 = []
find_what_positions1 = []
find_what_counter = 0
for find_what in find_what_reads:
find_where_counter = 0
for find_where in find_where_reads:
find_what_len = len(find_what)
find_where_len = len(find_where)
max_middle = find_max_middle(find_what_len, find_where_len)
a = find_what_len - max_middle
find_what_part = find_what[a:]
coincidence = find_where.find(find_what_part, 0)
if coincidence > 0:
find_where_res_string_position = find_what_positions[find_what_counter] + a - coincidence
d = find_where_len - len(res_string[find_where_res_string_position:])
if d > 0:
res_string = res_string + find_where[-d:]
find_where_reads[find_where_counter] = ''
find_what_reads1.append(find_where)
find_what_positions1.append(find_where_res_string_position)
find_where_counter += 1
find_what_counter += 1
if len(find_what_reads1) > 0:
res_string = MainFunc(find_what_reads1, find_where_reads, find_what_positions1, res_string)['res_string']
find_where_reads = MainFunc(find_what_reads1, find_where_reads, find_what_positions1, res_string)['find_where_reads']
res = {}
res['res_string'] = res_string
res['find_where_reads'] = find_where_reads
return res
def main():
reads = []
f = open('16.txt', 'r')
read = ''
for line in f:
if line[0] != '>':
read = read + line
else:
if read != '':
read_clean = read.replace('\n', '')
reads.append(read_clean)
read = ''
read_clean = read.replace('\n', '')
reads.append(read_clean)
res_string = reads[0]
n = len(res_string)
res_string_right2left = reads[0]
find_what_positions = [0]
find_what_reads = [reads[0]]
res_left2right = MainFunc(find_what_reads, reads, find_what_positions, res_string)
string_left2right = res_left2right['res_string']
find_where_reads_left2right = res_left2right['find_where_reads']
find_where_reads_right2left = []
for r in find_where_reads_left2right:
find_where_reads_right2left.append(r[::-1])
res_right2left = MainFunc([res_string_right2left[::-1]], find_where_reads_right2left, find_what_positions, res_string_right2left[::-1])
string_right2left = res_right2left['res_string']
print string_right2left[n:][::-1] + string_left2right
if __name__ == '__main__':
main()
Комментариев нет:
Отправить комментарий