Thursday, March 19, 2015

bpy9. Counting k-mer words in Python

The function count, below, will find the number of times a pattern appears.


For every position in text, minus length of pattern, it checks for equality with pattern, and returns the total number of equalities.

# bpy9.py

from __future__ import print_function, division
import numpy as np

def rand_dna(n):
    dna = ['A','C','T','G']
    seq = np.random.choice(dna,n)
    return "".join(seq)

def count(text, pattern):
    cnt = 0
    for i in range(len(text)-len(pattern)):
        if text[i:i+len(pattern)] == pattern: cnt+=1
    return cnt

np.random.seed(34)
dna = rand_dna(50)
dna_pattern = rand_dna(3)
c = count(dna, dna_pattern)
print("dna = ",dna)
print("dna_pattern = ", dna_pattern)
print("c =",c)

# dna =  CTTCTCAGCGTTCTCCACTCTACATCAGGCCGAAGTCAATTCATCTTTGT
# dna_pattern =  CTT
# c = 2

No comments:

Post a Comment