from src.util import util
from src.basics import lexical_analyzer as lex
import queue
import operator
[docs]class Species:
"""
Species (DNA Complexes) consists of one or more strands
"""
id = -1
'''species id'''
colormap = {}
'''map colors to nodes'''
colorset = {}
'''set of colors'''
nodes = []
'''strand ids in the species'''
canonicalform = ''
'''canonical form'''
strands = []
'''strands'''
parsingseq = []
''' parsing sequence for deriving canonical form'''
def __init__(self, nodes, colorset, colormap, strandgraph):
self.nodes = nodes
self.colormap = colormap
self.colorset = colorset
self.derive_canonical_form(strandgraph)
self.construct_strands()
[docs] def set_id(self, id):
self.id = id
[docs] @staticmethod
def derive_rootmap(nodes):
rootmap = {}
for i in nodes:
rootmap[i] = i
return rootmap
[docs] def get_starting_vertex(self, strandgraph):
"""
find the starting vertex to do derive canonical function
:param strandgraph: a StrandGraph object
:return: starting vertex id(strand id)
"""
minlen = 100000
minc = -1
for i in self.colorset:
curlen = len(self.colormap[i])
if curlen < minlen:
minlen = curlen
minc = i
elif curlen == minlen:
if minc > i:
minc = i
'''
minlen = 100000
minv = -1
for v in self.colormap[minc]:
curlen = len(strandgraph.bondgraph.adj[v])
if curlen < minlen:
minlen = curlen
minv = v
'''
if len(self.colormap[minc]) == 1:
return list(self.colormap[minc])[0]
startingvertex = self.prune(list(self.colormap[minc]), strandgraph)
'''
startingvertex = self.prune_starting_vertices(
self.derive_rootmap(self.colormap[minc]),
set(),
self.colormap[minc],
strandgraph.bondgraph,
set(strandgraph.V))
'''
return startingvertex
[docs] def prune(self, nodes, strandgraph):
"""
Pruning algorithm for selecting starting vertex
:param nodes: candidate nodes
:param strandgraph:
:return:
"""
q = [queue.Queue() for _ in range(len(nodes))]
visited = [[False for i in range(len(strandgraph.V))] for j in range(len(nodes))]
labelling = ['' for _ in range(len(nodes))]
bondnum = [0 for _ in range(len(nodes))]
pendingbond = [[] for _ in range(len(nodes))]
for i in range(len(nodes)):
q[i].put(nodes[i])
while len(nodes) != 1 and not q[0].empty():
minv = 0
labelling = ['' for _ in range(len(nodes))]
delete = []
kept = [0]
for i in range(len(nodes)):
curv = q[i].get()
visited[i][curv] = True
labelling[i], q[i], bondnum[i], pendingbond[i] = self.traverse(strandgraph,
curv,
q[i],
bondnum[i],
pendingbond[i],
'',
visited[i])
if i == minv:
continue
else:
if labelling[i] < labelling[minv]:
delete = kept
kept = [i]
minv = i
elif labelling[i] == labelling[minv]:
kept.append(i)
continue
else:
delete.append(i)
delete.sort(reverse=True)
for i in delete:
nodes.pop(i)
q.pop(i)
bondnum.pop(i)
pendingbond.pop(i)
visited.pop(i)
return nodes[0]
[docs] @staticmethod
def refresh_for_pruning(v, d, n2, c2, d2):
"""
deprecated
:param v:
:param d:
:param n2:
:param c2:
:param d2:
:return:
"""
return [v], [d], [n2], [c2], [d2]
[docs] def prune_starting_vertices(self, rootv, prevnodes, nodes, bondgraph, notvisited):
"""
deprecated
prune starting vertices (strands) when there are more than one instances of one strand type
:param rootv: rootmap for nodes (map current nodes to their root nodes)
:param prevnodes: previous layer of nodes
:param nodes: current layer of nodes
:param bondgraph: a BondGraph object
:param notvisited: list of not visited nodes
:return: starting vertex
"""
notvisited = notvisited - prevnodes
if len(nodes) == 1 or len(notvisited) == 0:
return rootv[nodes[0]]
minc = 10000
curv = []
nextc = []
nextv = []
curd = []
nextd = []
for v in nodes:
bondlen = 0
info = []
bonds = bondgraph.merge_bonds_ignoring_nodes(v, prevnodes)
# TODO: if len(bond)=0
for i in bonds:
bondlen += len(i.dom)
for j in range(0, len(i.dom)):
info.append((i.dom[j], i.node2, bondgraph.color[i.node2], i.dom2[j]))
info.sort(key=operator.itemgetter(0))
d, n2, c2, d2 = list(zip(*info))
if bondlen < minc:
minc = bondlen
curv, curd, nextv, nextc, nextd = self.refresh_for_pruning(v, d, n2, c2, d2)
elif bondlen == minc:
res1 = util.compare(d, curd[0])
res2 = util.compare(c2, nextc[0])
res3 = util.compare(d2, nextd[0])
if res1 == -1:
curv, curd, nextv, nextc, nextd = self.refresh_for_pruning(v, d, n2, c2, d2)
elif res1 == 0:
if res2 == -1:
curv, curd, nextv, nextc, nextd = self.refresh_for_pruning(v, d, n2, c2, d2)
elif res2 == 0:
if res3 == -1:
curv, curd, nextv, nextc, nextd = self.refresh_for_pruning(v, d, n2, c2, d2)
elif res3 == 0:
curv.append(v)
curd.append(d)
nextc.append(c2)
nextv.append(n2)
nextd.append(d2)
rootmap = {}
for i in range(0, len(nextv)):
rootmap[nextv[i]] = rootv[curv[i]]
if not isinstance(nodes, set):
tmpset = set()
for i in nodes:
tmpset = tmpset | set(i)
nodes = tmpset
return self.prune_starting_vertices(rootmap, nodes, nextv, bondgraph, notvisited)
[docs] @staticmethod
def check_pendingbond(b, pending):
"""
check if b is in pending bonds
:param b: domain
:param pending: pending bonds
:return: the bond number if b is in pending bond, -1 otherwise
"""
for i in range(0, len(pending)):
if pending[i][0] == b:
return i
return -1
[docs] def traverse(self, strandgraph, curv, q, bondnum, pendingbond, canonical, visited):
"""
traverse the strand and produce a canonical form
:param strandgraph: a StrandGraph object
:param curv: current vertex in strandgraph
:param q: queue for determining if all nodes in species has been encountered
:param bondnum: number of bonds
:param pendingbond: list of bonds (in domain level) that are pending to add
:param canonical: canonical form
:param visited: boolean list for visiting vertices in species
:return: canonical form for one strand
"""
string = '<'
bondgraph = strandgraph.bondgraph
E = []
flag = False
domlen = len(strandgraph.strands[curv].domains)
for i in bondgraph.adj[curv]:
E += i.transform_E()
E.sort(key=util.sort_e_by_domain)
cursor = 0
for i in range(0, domlen):
domain = strandgraph.strands[curv].domains[i]
string += domain.name
if domain.toehold:
string += '^'
if domain.comp:
string += '*'
if flag:
if i != domlen - 1:
string += ' '
continue
if len(E) > 0:
pos = self.check_pendingbond((curv, i), pendingbond)
if pos != -1:
string += '!' + str(pendingbond[pos][1])
pendingbond.pop(pos)
E.pop(cursor)
if cursor >= len(E):
flag = True
elif not flag and (curv, i) == E[cursor][0]:
bondnum += 1
string += '!' + str(bondnum)
pendingbond.append((E[cursor][1], bondnum))
if not visited[E[cursor][1][0]] and not E[cursor][1][0] in q.queue:
q.put(E[cursor][1][0])
cursor += 1
if cursor >= len(E):
flag = True
if i != domlen - 1:
string += ' '
string += '>'
canonical += string
return canonical, q, bondnum, pendingbond
[docs] def construct_strands(self):
"""
reverse construction of strands: canonical form -> strands
"""
recolormap = {}
for key, vals in self.colormap.items():
for val in vals:
recolormap[val] = key
strands = []
l = self.canonicalform.split('|')
for i in range(0, len(l)):
strand = lex.lexer_strand(l[i], 0)
strand.add_color(recolormap[self.parsingseq[i]])
strands.append(strand)
self.strands = strands
[docs] def generate_output(self):
"""
supporting function for txt output
:return: string of canonical form
"""
string = ''
string += str(self.id) + '\n'
l = self.canonicalform.split('|')
for i in range(0, len(l)):
string += l[i] + '\n'
return string
'''
def generate_sites(self):
sites = []
l = self.canonicalform.split('|')
for i in range(0, len(l)):
sites += lex.lexer_site(l[i])
return sites
'''