#!/usr/bin/env python import re # DEFINE FUNCTIONS def shared(x, y): sh = 0 grX = x.split("|") grY = y.split("|") for i in range(len(grX)): grX[i] = set(grX[i].split(",")) for j in range(len(grY)): grY[j] = set(grY[j].split(",")) for xXx in grX: for yYy in grY: if xXx.issubset(yYy): sh += 1 break return sh def scoreConflict(indA, patA, patterns): vrai = True dividand = 0 pg = patA.split("|") score = 0.0 pats2 = patterns[:] overall_rank = 0.0 conflict_score = 0.0 del pats2[indA] for bb, patB in enumerate(pats2): if re.search("\|", patB): dividand += 1 compB = patB.split("|") scB = 0.0 for group in compB: cont = 1 if vrai: tax = group.split(",") ch1 = tax[0] for gp in pg: if ch1 in gp: for ch2 in tax[1:]: if ch2 not in gp and cont: scB = scB + 1.0 cont = 0 score += (scB/len(compB)) conflict_score = score/dividand return conflict_score def uniqify(seq): seen = {} result = [] for item in seq: if item in seen : continue seen[item] = 1 result.append(item) return result def getPattern(site, unknown): considered = [] pattern = [] for x in range(len(site)): if site[x] not in unknown: if site[x] in considered: pattern[considered.index(site[x])].append(str(x)) else: considered.append(site[x]) pattern.append([str(x)]) patStr = "|".join([",".join(g) for g in pattern]) return patStr def jumblePattern(site, unknown): import random siteJ = "" while site: pos = random.randrange(len(site)) siteJ += site[pos] site = site[:pos] + site[(pos+1):] return getPattern(siteJ, unknown) def DNAdetect(seq): seq = seq.upper() oLen = float(len(seq)) seq_C = "" seq_C = seq.replace("A", "") seq_C = seq_C.replace("C", "") seq_C = seq_C.replace("G", "") seq_C = seq_C.replace("T", "") nLen = float(len(seq_C)) perc = (nLen/oLen)*100 if perc < 20.0: return "DNA" else: seq_C = seq.replace("0", "") seq_C = seq_C.replace("1", "") if len(seq_C) == 0: return "standard" else: return "protein" def histogram(num_list, name_list): upper = float(max(num_list)) pad = len(str(upper)) parts = [] for i in range(1,61): parts.append((upper/60)*i) for m, n in enumerate(num_list): pr = name_list[m] + "|" low = 0.0 if n == 0: pr = pr + " "*61 for p, hi in enumerate(parts): if n > low and n <= hi: pr = pr + "="*(p+1) + (" "*(60 - p)) break low = hi print ("[" + pr + "|" + str(n) + " "*(pad-len(str(n))) + "]") def FastaParse(file_name): file = open(file_name) names = [] seqs = [] for line in file: if ">" in line: names.append(line[1:].strip()) seqs.append("") else: seqs[-1] += line.strip() ret = [names, seqs] return ret def printHelp(): print (""" **************** TIGER Help: **************** TIGER: Tree-Independent Generation of Evolutionary Rates (Developed by Carla Cummins in the lab of James Mc Inerney, NUI Maynooth, Co. Kildare, Ireland) -Options: -in Specify input file. File must be in FastA format and must be aligned prior. Datasets with uneven sequence lengths will return an error. -v Returns current TIGER version. -f Changes output formatting options. -f s: sorts sites depending on their agreement score -f r: displays rank values rather than bin numbers -f s,r: displays sorted ranks (*Be sure to put only a "," NO SPACE!) Default prints bin numbers unsorted. -b Set the number of bins to be used. -b : Sites will be placed into number of bins. is a whole number. Default is 10 -rl A list of the rate at each site may be optionally written to a specified file. -rl : writes list of the rates at each site to file.txt. -ptp Specifies that a PTP test should be run. *Note: this option has a huge effect on running time! -z Number of randomisations to be used for the PTP test. -z : each site will be randomised times. is a whole number. Default is 100 -p Specify p-value which denotes significance in PTP test. -p : site will be denoted as significant if p-value is better than . is a floating point number. Default is 0.05 -pl Write a list of p-values to a specified file. -pl : writes list of p-values for each site to file.txt. -u Specify unknown characters in the alignment. Unknown characters are omitted from site patterns and so are not considered in the analysis. -u ?,-,*: defines ?, - and * as unknown characters. (*Be sure to put only a comma between characters, NO SPACE!!) Default is ? only -Examples: 1. ./TIGER -in ExampleFile.aln -f s,r -v -rl rate_list.txt This will run the software on "ExampleFile.aln", with sorted ranks included in the output. The variability measure for each site will be displayed and a list of the rates at (unsorted) sites will be written to the file "rate_list.txt". 2. ./TIGER -in ExampleFile.aln -ptp -r 1000 -p 0.01 -u ?,* This will run the software on the file "ExampleFile.aln" with a PTP test. Sites will be randomised 1,000 times and pass the test if their p-value is <0.01. All ? and * characters encountered in the alignment will be ommitted from the analysis. """)