From 4efb7d6030e872234299c013023ee9d43318e1ec Mon Sep 17 00:00:00 2001 From: Kuoi Date: Thu, 10 Feb 2022 16:08:34 +0000 Subject: [PATCH] init --- dnasp2popart.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 dnasp2popart.py diff --git a/dnasp2popart.py b/dnasp2popart.py new file mode 100644 index 0000000..32f6fcf --- /dev/null +++ b/dnasp2popart.py @@ -0,0 +1,30 @@ +from collections import defaultdict +import re + +data = defaultdict(lambda : 0) +hap_list = set() + +with open("16S.arp", "r") as f: + sample_array = f.read().split("}\n") + for sample in sample_array: + sample_data = sample.split("\n") + if len(sample_data[0].split('"'))>1: + # Sample name + sample_name = sample_data[0].split('"')[1] + data[sample_name] = defaultdict(lambda : 0); + data[sample_name]["sample_size"] = sample_data[1].split('=')[1].strip() + for hap in sample_data[3:]: + if hap: + data[sample_name][hap.split()[0].strip()] = hap.split()[1].strip() + hap_list.add(hap.split()[0].strip()) + +with open("output.csv", "w+") as f: + hap_list = sorted(list(hap_list), key=lambda x : int(x.split('_')[1])) + key_list = sorted(data.keys()) + f.write("sample_name, " + ", ".join(key_list) + "\n") + f.write("sample_size, " + ", ".join([data[key]["sample_size"] for key in key_list]) + "\n") + for hap in hap_list: + f.write(hap) + for key in key_list: + f.write(", " + str(data[key][hap])) + f.write("\n")