воскресенье, 15 января 2017 г.

Open Reading Frames (ROSALIND ORF)

Given: A DNA string s of length at most 1 kbp in FASTA format.

Return: Every distinct candidate protein string that can be translated from ORFs of s. Strings can be returned in any order.

I use codon_table() function here.

def find_substr(find_what, find_where):
    res = []
    for i in range(len(find_where)-len(find_what) + 1):
        flag = bool(1)
        for j in range(len(find_what)):
            if find_where[i+j] != find_what[j]:
                flag = bool(0)
                break 
            if flag:
            res.append(i)
    return res

def protein_maker(line):
    proteins = []
    start_points = find_substr('ATG', line)
    protein_string = '' 
    for i in start_points:
        k = i
        while k < len(line) - 2:
            codon = line[k:k + 3]
            if codon == 'TAG' or codon == 'TGA' or codon == 'TAA':
                proteins.append(protein_string)
                protein_string = '' 
                break 
            else:
                protein_string += c_table[codon]
                k += 3     
    return proteins

f = open('orf.txt', 'r')
line = ''counter = 0for l in f:
    if counter != 0:
        line += l
    counter += 1line = line.replace('\n', '')

c_table = codon_table()

# consider reverse compliment
reversereverse_line = line[::-1]
#compliment
reverse_compliment_line = ''
for k in reverse_line:
    if k == 'A':
        reverse_compliment_line += 'T'     
    if k == 'T':
        reverse_compliment_line += 'A'     
    if k == 'C':
        reverse_compliment_line += 'G' 
    if k == 'G':
        reverse_compliment_line += 'C'
prts = protein_maker(line)
rev_prts = protein_maker(reverse_compliment_line)
proteins = list(set(prts)|set(rev_prts))
for i in proteins:
    print(i)

Комментариев нет:

Отправить комментарий