понедельник, 3 апреля 2017 г.

Певзнер. Задачи.

 2.1) Разделиться список на два (большие и маленькие значения) сравнивая исходный по парам ([n/2]). Находим мин в списке с маленькими значениями и Макс в списке с большими.

2.2)

m = [8, 4, 6]

res = ''for i0 in range(m[0]+1):
    res += str(i0)
    for i1 in range(m[1]+1):
        res += str(i1)
        for i2 in range(m[2] + 1):
            res += str(i2)
print(res)

def recursion(iter_num, iter_val, res):
    if iter_num < len(m):
        if iter_val <= m[iter_num]:
            res += str(iter_val)
            res = recursion(iter_num + 1, 0, res)
            return recursion(iter_num, iter_val + 1, res)
        else:
            return res
    else:
        return res

print(recursion(0, 0, ''))
 
 
2.3) да нет нет 

2.17) бактерий на k-ом шаге будет 2^k n - 2^k k. Отсюда видно, что через n шагов все бактерии будут съеден


2.18) как? 99*2 только если сразу попадется правдивей

2.19)  

воскресенье, 26 февраля 2017 г.

Enumerating k-mers Lexicographically (ROSALIND LEXF)

Given: A collection of at most 10 symbols defining an ordered alphabet, and a positive integer n (n≤10).

Return: All strings of length n that can be formed from the alphabet, ordered lexicographically.

s = input()
n = int(input())

alphabet = s.split(' ')

def cycle(res, k):
    if k > 1:
        new_res = []
        for r in range(len(res)):
            for i in alphabet:
                new_res.append(res[r] + i)
        return cycle(new_res, k-1)
    else:
        return res

for i in cycle(alphabet, n):
    print(i)

Enumerating Oriented Gene Orderings (ROSALIND SIGN)

Given: A positive integer n≤6.

Return: The total number of signed permutations of length n, followed by a list of all such permutations (you may list the signed permutations in any order).

import copy
import math


def permutations(my_set, perm):
    if len(my_set) > 0:
        res = []
        for s in my_set:
            new_my_set = copy.deepcopy(my_set)
            new_my_set.remove(s)
            for j in permutations(new_my_set, perm + str(s)):
                res.append(j)
        return res
    else:
        return [perm]


def lenn_sets(my_set, res, k):
    if k > 1:
        new_res = []
        for r in range(len(res)):
            for i in my_set:
                new_res.append(res[r] + i)
        return lenn_sets(my_set, new_res, k-1)
    else:
        return res


n = 6r1 = permutations(list(range(1,n+1)), '')
my_set2 = ['+', '-']
r2 = lenn_sets(my_set2, copy.deepcopy(my_set2), n)
f = open('sign.txt', 'w')
f.write(str(int(math.factorial(n)*math.pow(2, n)))+'\n')
for r_1 in r1:
    for r_2 in r2:
        l = ''        for i in range(n):
            l += r_2[i] + r_1[i] + ' '        f.write(l.replace('+', '')+'\n')
f.close()

четверг, 23 февраля 2017 г.

Locating Restriction Sites (ROSALIND REVP)

Given: A DNA string of length at most 1 kbp in FASTA format.

Return: The position and length of every reverse palindrome in the string having length between 4 and 12.

s = input()

def complementary(a, b):
    if (a == 'A' and b == 'T') or (a == 'T' and b == 'A') or (a == 'G' and b == 'C') or (a == 'C' and b == 'G'):
        return True    else:
        return False

res = []
for i in range(1, 6):
    if complementary(s[i - 1], s[i]):
        j = 1        while j < i:
            if complementary(s[i - 1 - j], s[i + j]):
                res.append([i-j, 2 * (j+1)])
                j += 1            else:
                breakfor i in range(6, len(s)-5):
    if complementary(s[i - 1], s[i]):
        j = 1        while j < 6:
            if complementary(s[i - 1 - j], s[i + j]):
                res.append([i - j, 2 * (j+1)])
                j += 1            else:
                breakfor i in range(len(s)-5, len(s)-1):
    if complementary(s[i - 1], s[i]):
        j = 1        while j < len(s)-i:
            if complementary(s[i - 1 - j], s[i + j]):
                res.append([i - j, 2 * (j+1)])
                j += 1            else:
                breakfor r in res:
    print(''.join(str(r[0])) + ' ' + ''.join(str(r[1])))

среда, 22 февраля 2017 г.

RNA Splicing (ROSALIND SPLC)

Given: A DNA string s (of length at most 1 kbp) and a collection of substrings of s acting as introns. All strings are given in FASTA format.

Return: A protein string resulting from transcribing and translating the exons of s. (Note: Only one solution will exist for the dataset provided.)

import re
import rosalind_lib

f = open('splc.txt', 'r')
strings = re.findall(r'(>Rosalind_[0-9]+)\n(([A-Z]+\n)+)', f.read())
rna = strings.pop(0)[1].replace('\n','').replace('T', 'U')
introns = {}
for s in strings:
    intron = s[1].replace('\n','').replace('T', 'U')
    rna = rna.replace(intron,'')
print(rosalind_lib.rna_to_protein(rna))



понедельник, 20 февраля 2017 г.

Find a substring

t = '010's = '0111010100001'print(s.find(t)) - first position

Common problem: we have string s (usually very long) and string t (usually short). Find positions, where t is a substring of s.
Example: for s = 'TATGCAATGC' and t = 'TGC' positions are: 2, 7.

Trere are several algorithms to find them: wikipedia.
I'll show here two easiest ways:
naive algorithm (find all pos):
def substring_positions(s,t):
    res = []
    for i in range(len(s) - len(t) + 1):
        flag = True        for j in range(len(t)):
            if s[i + j] != t[j]:
                flag = False                break        if flag:
            res.append(str(i))
    return res

knuth-morris-pratt algorithm:
def kmp(s,t, limit = 1):
    # building prefix table pt for s    # it's i-th element shows, how many steps go backward on s    # if there will be a mismatch with t string    pt = [0]
    for m in s[1:]:
        j = pt[-1]
        while j > 0 and m != s[j]:
            j = pt[j-1]
        if m == s[j]:
            j += 1        pt.append(j)
    # walk through the s    res = []
    j = 0 # number or symbols matched with t    for i in range(len(s)):
        while j > 0 and s[i] != t[j]:
            j = pt[j - 1]
        if s[i] == t[j]:
            j += 1        if j == len(t):
            res.append(i - j + 1)
            if len(res) == limit:
                return res
            j = 0    return res
 

среда, 15 февраля 2017 г.

Bioinformatics Contest 2017 (task1)

Everybody knows that a huge number of different chemical reactions happen in cells. A reaction takes as an input a set of chemicals (substrates) and converts them to another set (products). Here we consider all reactions to be one-directional. A substrates for a reaction could be chemicals from the environment or products from other reactions. 
A scientist John Doe was given a cell and a list of chemicals that are present in the environment at the beginning. He already knows what reactions could happen in the cell. You should help him to understand which chemicals could appear in the cell.

Input Format


The first line contains initial chemicals separated by spaces. There is always at least one initial chemical. Each chemical is represented by a random positive integer that fits in 4-byte integer type. Each of the following lines describes a reaction, one per line. Each reaction is presented as two lists of integers separated by '->': the list of chemicals, separated by '+', that is needed to perform a reaction and the list of chemicals, separated by '+', that are produced as a result of the reaction. Each chemical could be present in each reaction maximum 2 times: one time at the left part and the other time at the right part (for example, a catalyst could appear in both parts).
Constraints for the easy version: a total number of chemicals through all reactions does not exceed 103
.
Constraints for the hard version: a total number of chemicals through all reactions does not exceed 105.

import sys


def chem_recorder(data_set):
    while len(data_set) > 0:
        new_data_set = set()
        for add_chem in data_set:
            if add_chem not in chem_in_cell:
                chem_in_cell.add(add_chem)
                if add_chem in chems_lines:
                    counter = 0                    for chem_line in chems_lines[add_chem]:
                        chem_line[0].remove(add_chem)
                        if len(chem_line[0].difference(chem_in_cell)) == 0:
                            new_add = chem_line[1].difference(chem_in_cell)
                            new_data_set = new_data_set.union(new_add)
                            chems_lines[add_chem].pop(counter)
                        counter += 1        data_set = new_data_set.copy()


all_lines = sys.stdin.readlines()
in_cell = all_lines.pop(0)
chem_in_cell = set(in_cell.strip().split(' '))
xxx = set(in_cell.strip().split(' '))
chems_lines = {} # 'chem' => [[{left_line1}, {right_line1}], [{left_line2}, {right_line2}]]for line in all_lines:
    line = line.strip()
    l1 = line.split('->')
    left_line = set(l1[0].split('+'))
    right_line = set(l1[1].split('+'))
    not_in_cell_chem = left_line.difference(chem_in_cell)
    if len(not_in_cell_chem) > 0:
        for nic_chem in not_in_cell_chem:
            if nic_chem in chems_lines:
                flag = 0                for chem_line in chems_lines[nic_chem]:
                    if chem_line[0] == not_in_cell_chem:
                        chem_line[1] = chem_line[1].union(right_line)
                        flag = 1                        break                if flag == 0:
                    chems_lines[nic_chem].append([not_in_cell_chem, right_line])
            else:
                chems_lines[nic_chem] = [[not_in_cell_chem, right_line]]
    else:  # gote all chems        chem_recorder(right_line)
chem_recorder(xxx)
print(' '.join(chem_in_cell))