30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
from collections import defaultdict
|
|
import re
|
|
|
|
data = defaultdict(lambda : 0)
|
|
hap_list = set()
|
|
|
|
with open("16S.arp", "r") as f:
|
|
sample_array = f.read().split("}\n")
|
|
for sample in sample_array:
|
|
sample_data = sample.split("\n")
|
|
if len(sample_data[0].split('"'))>1:
|
|
# Sample name
|
|
sample_name = sample_data[0].split('"')[1]
|
|
data[sample_name] = defaultdict(lambda : 0);
|
|
data[sample_name]["sample_size"] = sample_data[1].split('=')[1].strip()
|
|
for hap in sample_data[3:]:
|
|
if hap:
|
|
data[sample_name][hap.split()[0].strip()] = hap.split()[1].strip()
|
|
hap_list.add(hap.split()[0].strip())
|
|
|
|
with open("output.csv", "w+") as f:
|
|
hap_list = sorted(list(hap_list), key=lambda x : int(x.split('_')[1]))
|
|
key_list = sorted(data.keys())
|
|
f.write("sample_name, " + ", ".join(key_list) + "\n")
|
|
f.write("sample_size, " + ", ".join([data[key]["sample_size"] for key in key_list]) + "\n")
|
|
for hap in hap_list:
|
|
f.write(hap)
|
|
for key in key_list:
|
|
f.write(", " + str(data[key][hap]))
|
|
f.write("\n")
|