import string import sys # =================================================================================== # readPrimers (filename) # =================================================================================== # # lecture des primers dans le fichier 'filename' # # ----------------------------------------------------------------------------------- # INPUT # ----- # format du fichier: # # > int int [start/end] texte_amorce # ATTAGGACCAGGATTGGAGACCCAG # > int int [start/end] texte_amorce # GGATAGAGCACGAGATAGAGGACCA # ... # # le premier entier est l'index ou debute l'amorce dans le genome # le second est l'index ou termine l'amorce dans le genome # # OUTPUT # ------ # 2 listes sont constituees : les listes start et stop qui sont de la forme # # [[idx0,'ATTAGGACCAGGATTGGAGACCCAG'] [idx1,'GGATAGAGCACGAGATAGAGGACCA'] ... ] # # elles contiennent la liste des index des amorces de debut (start) # ou de fin (stop) # un element de la liste est un couple (idx, amorce) ou idx est l'index # ou se touve l'amorce dans le genome # # ----------------------------------------------------------------------------------- def readPrimers (filename,sidx,eidx): f=open(filename,'r') raw_list=f.readlines() f.close() start = [] stop = [] for i in range(0,len(raw_list),2): l = string.split(raw_list[i]) if l[3]=='start': k = int(l[1]) else: k = int(l[2]) x = [k] if k >= sidx and k <= eidx: x.append(string.split(raw_list[i+1])[0]) if l[3]=='start': start.append(x) else: stop.append(x) return start, stop # test d'inclusion def fragNotInclude (f,lf): i=1 while i=tf[2]: return 1 i=i+1 return 0 def fragInclude (f,lf): i=1 while i=f[2]: return i i=i+1 return -1 # connexion du graphe # -------------------- def connect_frag(frag_start,frag_stop,frag_idxstart,frag_idxstop,list_frag,MAX_LENGTH,MIN_OVERLAP,MAX_OVERLAP): frag = [[frag_start,frag_stop,frag_idxstart,frag_idxstop]] frag_len = frag_stop-frag_start i = len(list_frag) while i>0: i=i-1; list_start = list_frag[i][0][0] list_stop = list_frag[i][0][1] list_len = list_stop - list_start if list_start+MAX_LENGTH < frag_start: return frag if list_stop-MIN_OVERLAP > frag_start: if list_stop-frag_start<=MAX_OVERLAP: k=1 while kl[2]: fr = [i,l[1],frag_len] else: fr = [i,l[1],l[2]] k=k+1 if fragNotInclude(fr,frag)==0: frag.append(fr) j = fragInclude(fr,frag) while j != -1: frag.pop(j) j = fragInclude(fr,frag) return frag def genofrag (minle, maxle, minov, maxov, startz, endz, startidx, endidx, infile): list_start, list_stop = readPrimers(infile,startidx,endidx) MIN_LENGTH = minle MAX_LENGTH = maxle MIN_OVERLAP = minov MAX_OVERLAP = maxov display = "" display = display + '=================================================================================================\n' display = display + '\n' display = display + ' GenoFrag - version 2.1 (2009)\n' display = display + ' D. Lavenier\n' display = display + ' copyright IRISA / ENS Cachan / INRA\n' display = display + '\n' display = display + 'parameters\n' display = display + '-------------------------------------------------------------------------------------------------' display = display + '\n' display = display + ' fragment length = [' + str(MIN_LENGTH) + ',' + str(MAX_LENGTH) + ']\n' display = display + ' overlap = [' + str(MIN_OVERLAP) + ',' + str(MAX_OVERLAP) + ']\n' display = display + '\n' if len(list_start)>0 : START_ZONE = startz + list_start[0][0] else: Error = [2] display = display + 'no start primers found in [' + str(startidx) + ',' + str(endidx) + ']\n' return display, Error if len(list_stop) > 0 : END_ZONE = list_stop[len(list_stop)-1][0] - endz else : Error = [2] display = display + 'no stop primers found in [' + str(startidx) + ',' + str(endidx) + ']\n' return display, Error display = display + ' zone start = [' + str(list_start[0][0]) + ',' + str(START_ZONE) + ']\n' display = display + ' end = [' + str(END_ZONE) + ',' + str(list_stop[len(list_stop)-1][0]) + ']\n' display = display + '\n\n' # construction d'une liste de fragments # ------------------------------------- # # un fragment a la structure suivante : # # [[idx_start, idx_stop, idx_list_start, idx_list_stop][idx_list_frag,min_len,max_len] ... ] # # le premier element a quatre champs # idx_start = index dans le genome ou debute le fragment # idx_stop = index dans le genome ou termine le fragment # idx_list_start = index dans la liste des amorces de debut # idx_list_stop = index dans la liste des amorces de fins # les elements suivants ont trois champ # idx_list_frag = index dans la liste des fragments sur un fragment chevauchant # min_len = taille min de l'intervalle optimal sur ce chemin # max_len = taille max de l'intervalle optimal sur ce chemin # initialisation de la liste des fragments list_frag = [] # positionnement sur la liste 'list_stop' immediatement apres # l'ammorce (dans list_start) qui possede un index superieur next_idxstop = 0 while list_stop[next_idxstop][0]= MIN_LENGTH : frag = connect_frag (istart,istop,idxstart,idxstop,list_frag,MAX_LENGTH,MIN_OVERLAP,MAX_OVERLAP) # test s'il existe des connexions : une longueur de 'frag' < 2 # indique qu'aucune connexion existe if len(frag)>1: list_frag.append(frag) # print len(list_frag)-1, frag else: if istart<=START_ZONE: # ajout d'une connexion d'initialisation (premier fragment) frag.append([-1,istop-istart,istop-istart]) list_frag.append(frag) # print len(list_frag)-1, frag idxstop = idxstop+1 if idxstop >= len(list_stop): break istop = list_stop[idxstop][0] # test pour savoir si le genome a ete parcouru en entier if list_frag[len(list_frag)-1][0][1] < END_ZONE: # SI NON : on prend le dernier fragment comme solution # et on prend le plus petit intervalle dans le dernier # fragment connecte mingap = MAX_LENGTH - MIN_LENGTH i = len(list_frag) - 1 lfr = list_frag[i] j=1 while j= END_ZONE: lfr = list_frag[i] # print i,lfr j=1 while j= START_ZONE: minlen = lfr[inextnumfrag][1] maxlen = lfr[inextnumfrag][2] x = [list_frag[numfrag][0][0]] x.append(list_frag[numfrag][0][1]) x.append(list_start[list_frag[numfrag][0][2]][1]) x.append(list_stop[list_frag[numfrag][0][3]][1]) solution.append(x) numfrag=nextnumfrag lfr = list_frag[numfrag] j=1 while j=minlen and lfr[j][2]<=maxlen: nextnumfrag = lfr[j][0] inextnumfrag = j j=j+1 minlen = lfr[inextnumfrag][1] maxlen = lfr[inextnumfrag][2] x = [list_frag[numfrag][0][0]] x.append(list_frag[numfrag][0][1]) x.append(list_start[list_frag[numfrag][0][2]][1]) x.append(list_stop[list_frag[numfrag][0][3]][1]) solution.append(x) over_max = 0 over_min = 1000000 for i in range(1,len(solution)): k = solution[i][1]-solution[i-1][0] if over_max < k : over_max=k if over_min > k : over_min=k if Error[0]==1: if len(solution) >= 1: Error.append([solution[0][0],solution[0][1],len(solution)]) else : Error.append([-1,-1,-1]) if len(solution) >= 2: Error.append([solution[1][0],solution[1][1],len(solution)-1]) else : Error.append([-1,-1,-1]) l=[] for i in range (len(list_start)): if list_start[i][0] > Error[1][0] and list_start[i][0] < Error[1][1]-1000: l.append(list_start[i][0]) Error.append(l) l=[] for i in range (len(list_stop)): if list_stop[i][0] > Error[1][0] and list_stop[i][0]=0: display = display + str(repr(len(solution)-i).rjust(5)) + ' ' display = display + str(repr(solution[i][0]).rjust(9)) + ' ' display = display + str(repr(solution[i][1]).rjust(9)) + ' ' display = display + str(repr(solution[i][1]-solution[i][0]).rjust(6)) + ' ' if ok==0: display = display + ' ' ok=1 else: display = display + str(repr(solution[i+1][1]-solution[i][0]).rjust(8)) + ' ' display = display + str(solution[i][2].rjust(27)) display = display + str(solution[i][3].rjust(27)) + '\n ' i=i-1 return display, Error from Tkinter import * class App: def __init__(self, master): self.lenmin_init = 9000 self.lenmax_init = 11000 self.ovmin_init = 500 self.ovmax_init = 1500 self.start_init = 5000 self.end_init = 5000 self.lenmin = StringVar() self.lenmax = StringVar() self.ovmin = StringVar() self.ovmax = StringVar() self.start = StringVar() self.end = StringVar() Label(master, text=" ").grid(row=0,column=0) Label(master, text=" ").grid(row=0,column=2) frame0 = Frame(master) frame0.grid(row=0, column=1, sticky=W) Label(frame0, text="GenoFrag\n", font=("Helvetica", 20, "bold")).grid(row=0,column=5, columnspan=4) Label(frame0, text="______ Fragment Length ______\n",).grid(row=1,column=0, columnspan=4) Label(frame0, text="__________ Overlap __________\n",).grid(row=1,column=5, columnspan=4) Label(frame0, text="______ Start/End Zones ______\n",).grid(row=1,column=10, columnspan=4) Label(frame0, text="min", justify=LEFT).grid(row=2, column=0, sticky=W) Label(frame0, textvariable=self.lenmin, width=10).grid(row=2, column=1, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add100lenmin).grid(row=2, column=2) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub100lenmin).grid(row=2, column=3) Label(frame0, text=" ").grid(row=2, column=4) Label(frame0, text="max", justify=LEFT).grid(row=3, column=0, sticky=W) Label(frame0, textvariable=self.lenmax, width=10).grid(row=3, column=1, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add100lenmax).grid(row=3, column=2) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub100lenmax).grid(row=3, column=3) Label(frame0, text=" ").grid(row=3, column=4) Label(frame0, text="min", justify=LEFT).grid(row=2, column=5, sticky=W) Label(frame0, textvariable=self.ovmin, width=10).grid(row=2, column=6, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add10ovmin).grid(row=2, column=7) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub10ovmin).grid(row=2, column=8) Label(frame0, text=" ").grid(row=2, column=9) Label(frame0, text="max", justify=LEFT).grid(row=3, column=5, sticky=W) Label(frame0, textvariable=self.ovmax, width=10).grid(row=3, column=6, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add10ovmax).grid(row=3, column=7) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub10ovmax).grid(row=3, column=8) Label(frame0, text=" ").grid(row=3, column=9) Label(frame0, text="start", justify=LEFT).grid(row=2, column=10, sticky=W) Label(frame0, textvariable=self.start, width=10).grid(row=2, column=11, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add1000start).grid(row=2, column=12) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub1000start).grid(row=2, column=13) Label(frame0, text="end", justify=LEFT).grid(row=3, column=10, sticky=W) Label(frame0, textvariable=self.end, width=10).grid(row=3, column=11, sticky=E) Button(frame0, text="+", fg="red", width=1, height=1, command=self.add1000end).grid(row=3, column=12) Button(frame0, text="-", fg="blue", width=1, height=1, command=self.sub1000end).grid(row=3, column=13) Label(master, text=" ").grid(row=1,column=0) frame5 = Frame(master) frame5.grid(row=2, column=1, sticky=W) Label(frame5, text=" ").grid(row=4,column=0) Label(frame5, text="Primer file ").grid(row=5,column=0, columnspan=2, sticky=W) self.FilePrimer = Entry(frame5, width=80) self.FilePrimer.grid(row=5, column=2,columnspan=12) Label(frame5, text="Output file ").grid(row=6,column=0, columnspan=2, sticky=W) self.OutFile = Entry(frame5, width=80) self.OutFile.insert(0,"genofrag.res") self.OutFile.grid(row=6, column=2,columnspan=12) Label(master, text=" ").grid(row=3,column=0) frame1 = Frame(master) frame1.grid(row=4, column=1, sticky=W) Button(frame1, text=" RUN ", bg="#ffaaaa", command=self.run).grid(row=0, column=0) Button(frame1, text="CLEAR", bg="#aaffaa", command=self.clear).grid(row=0, column=1) Button(frame1, text="SAVE ", bg="#aaaaff", command=self.save).grid(row=0, column=2) Label(frame1, text=" Covering zone : from ").grid(row=0,column=3) self.MinIndex = Entry(frame1, width=7) self.MinIndex.grid(row=0,column=4) self.MinIndex.insert(0,"0") Label(frame1, text=" to ").grid(row=0,column=5) self.MaxIndex = Entry(frame1, width=7) self.MaxIndex.grid(row=0,column=6) self.MaxIndex.insert(0,"50000") Label(master, text=" ").grid(row=5,column=0) self.Visu = Canvas(master, width=730, height=80) self.Visu.grid(row=6,column=1) Label(master, text=" ").grid(row=7,column=0) frame2 = Frame(master) frame2.grid(row=8, column=1, sticky=W) scroll2 = Scrollbar(frame2) scroll2.pack(side=RIGHT, fill=Y) self.OutGF = Text(frame2, width=101, height=30,background="#aaaaaa", font=("courier", 9), yscrollcommand=scroll2.set) self.OutGF.pack(side=LEFT, fill=BOTH) scroll2.config(command=self.OutGF.yview) Label(master, text=" ").grid(row=9,column=0) self.display="" self.reset() def resetVisu(self): self.Visu.delete(ALL) self.Visu.create_line(2,5,730,5) self.Visu.create_line(2,5,2,75) self.Visu.create_line(2,75,730,75) self.Visu.create_line(730,75,730,5) def run(self): try: f = open(self.FilePrimer.get()) f.close() except: self.resetVisu() self.Visu.create_text(365,40, font=("Helvetica",15), text="cannot find Primer file") return p1 = self.lenmin_int p2 = self.lenmax_int p3 = self.ovmin_int p4 = self.ovmax_int p5 = self.start_int p6 = self.end_int p7 = int(self.MinIndex.get()) p8 = int(self.MaxIndex.get()) self.display,error=genofrag(p1,p2,p3,p4,p5,p6,p7,p8,self.FilePrimer.get()) self.OutGF.delete(1.0,END) self.OutGF.insert(END,self.display) self.resetVisu() if error[0]<>0: self.Visu.create_text(640,15, font=("Helvetica",12,"bold"), fill="red", text="NO SOLUTION FOUND" ) else: self.Visu.create_text(365,40, font=("Helvetica",15), text="Solution found") if error[0]==1: x = 15 for i in range(error[1][0],error[1][1],1000): if i%10000 == 0: tt = str(i/1000) + 'K' self.Visu.create_line(x,50,x,60) self.Visu.create_text(x,70, font=("Hevetica",8), text=tt) else: self.Visu.create_line(x,50,x,55) x=x+20 if error[2][0] <> -1 : x1 = (error[2][0]-error[1][0])/50 + 15 x2 = (error[2][1]-error[1][0])/50 + 15 self.Visu.create_line(x1,30,x2,30) self.Visu.create_text((x1+x2)/2,23, font=("Helvetica",8), text=str(error[2][2])) self.Visu.create_line(x1,30,x1+5,30,fill="blue",width=3) self.Visu.create_line(x2-5,30,x2,30,fill="red",width=3) if error[3][0] <> -1 : x1 = (error[3][0]-error[1][0])/50 + 15 x2 = (error[3][1]-error[1][0])/50 + 15 if x1 < 15 : x1 = 15 self.Visu.create_line(x1,20,x2,20) self.Visu.create_text((x1+x2)/2,13, font=("Helvetica",8), text=str(error[3][2])) self.Visu.create_line(x2-5,20,x2,20,fill="red",width=3) if x1 > 15: self.Visu.create_line(x1,20,x1+5,20,fill="blue",width=3) self.Visu.create_rectangle(15,35,715,45, fill="#ccffcc", outline="") for i in range(len(error[4])): x = (error[4][i]-error[1][0])/50 + 15 self.Visu.create_line(x,35,x,45, fill="blue") for i in range(len(error[5])): x = (error[5][i]-error[1][0])/50 + 15 self.Visu.create_line(x,35,x,45, fill="red") def reset(self): self.lenmin_int = self.lenmin_init self.lenmin.set(str(self.lenmin_int)) self.lenmax_int = self.lenmax_init self.lenmax.set(str(self.lenmax_int)) self.ovmin_int = self.ovmin_init self.ovmin.set(str(self.ovmin_int)) self.ovmax_int = self.ovmax_init self.ovmax.set(str(self.ovmax_int)) self.start_int = self.start_init self.start.set(str(self.start_int)) self.end_int = self.end_init self.end.set(str(self.end_int)) self.OutGF.delete(1.0,END) self.resetVisu() def clear(self): self.OutGF.delete(1.0,END) self.resetVisu() def save(self): f = open(self.OutFile.get(),'w') f.write(self.display) f.close() def add100lenmin(self): self.lenmin_int += 100 if self.lenmin_int > self.lenmax_int: self.lenmin_int=self.lenmax_int self.lenmin.set(str(self.lenmin_int)) def sub100lenmin(self): self.lenmin_int -= 100 if self.lenmin_int < 0: self.lenmin_int=0 self.lenmin.set(str(self.lenmin_int)) def add100lenmax(self): self.lenmax_int += 100 if self.lenmax_int > 50000: self.lenmax_int=50000 self.lenmax.set(str(self.lenmax_int)) def sub100lenmax(self): self.lenmax_int -= 100 if self.lenmax_int < self.lenmin_int: self.lenmax_int=self.lenmin_int self.lenmax.set(str(self.lenmax_int)) def add10ovmin(self): self.ovmin_int += 50 if self.ovmin_int > self.ovmax_int: self.ovmin_int=self.ovmax_int self.ovmin.set(str(self.ovmin_int)) def sub10ovmin(self): self.ovmin_int -= 50 if self.ovmin_int < 0: self.ovmin_int=0 self.ovmin.set(str(self.ovmin_int)) def add10ovmax(self): self.ovmax_int += 50 if self.ovmax_int > 50000: self.ovmax_int=50000 self.ovmax.set(str(self.ovmax_int)) def sub10ovmax(self): self.ovmax_int -= 50 if self.ovmax_int < self.ovmin_int: self.ovmax_int=self.ovmin_int self.ovmax.set(str(self.ovmax_int)) def add1000start(self): self.start_int += 1000 self.start.set(str(self.start_int)) def sub1000start(self): self.start_int -= 1000 if self.start_int < 0: self.start_int=0 self.start.set(str(self.start_int)) def add1000end(self): self.end_int += 1000 self.end.set(str(self.end_int)) def sub1000end(self): self.end_int -= 1000 if self.end_int < 0: self.end_int=0 self.end.set(str(self.end_int)) root = Tk() app = App(root) root.title('GenoFrag') root.mainloop()