2023-04-12 03:39:54 +08:00
|
|
|
#include <malloc.h>
|
|
|
|
#include <stdio.h>
|
2023-04-12 03:41:11 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2023-04-12 03:39:54 +08:00
|
|
|
#define TRUE 1
|
|
|
|
#define FALSE 0
|
2023-04-12 03:41:11 +08:00
|
|
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
|
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
struct data_format {
|
2023-04-12 03:39:54 +08:00
|
|
|
int length;
|
|
|
|
char *nuc;
|
|
|
|
int offset;
|
|
|
|
char name[64];
|
|
|
|
char type;
|
|
|
|
};
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
char *Realloc(char *block, int size);
|
|
|
|
char *Calloc(int count, int size);
|
|
|
|
int ErrorOut(int code, char *string);
|
|
|
|
int Errorout(char *string);
|
|
|
|
int ReadFlat(FILE *file, struct data_format align[], int maxseqs);
|
|
|
|
int WriteData(FILE *file, struct data_format data[], int count);
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int ReadFlat(FILE *file, struct data_format align[], int maxseqs)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
int j, len = 0, count = -1, offset;
|
2023-04-12 03:39:54 +08:00
|
|
|
unsigned maxlen = 1024;
|
2023-04-12 03:41:11 +08:00
|
|
|
char cinline[1025];
|
|
|
|
extern char *Calloc(), *Realloc();
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
if (file == NULL) Errorout("Cannot open data file");
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
for (; fgets(cinline, 1024, file) != NULL;) {
|
|
|
|
cinline[strlen(cinline) - 1] = '\0';
|
|
|
|
switch (cinline[0]) {
|
2023-04-12 03:39:54 +08:00
|
|
|
case '>':
|
|
|
|
case '#':
|
|
|
|
case '%':
|
|
|
|
case '"':
|
|
|
|
case '@':
|
2023-04-12 03:41:11 +08:00
|
|
|
offset = 0;
|
|
|
|
for (j = 0; j < strlen(cinline); j++) {
|
|
|
|
if (cinline[j] == '(') {
|
|
|
|
sscanf(
|
|
|
|
(char *)(cinline + j + 1),
|
|
|
|
"%d", &offset);
|
|
|
|
cinline[j] = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count != -1) {
|
2023-04-12 03:39:54 +08:00
|
|
|
align[count].length = len;
|
|
|
|
align[count].nuc[len] = '\0';
|
|
|
|
maxlen = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
count++;
|
2023-04-12 03:41:11 +08:00
|
|
|
if (count > maxseqs)
|
|
|
|
Errorout(
|
|
|
|
"Sorry, alignment is too large");
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
align[count].nuc = Calloc(maxlen, sizeof(char));
|
|
|
|
align[count].type = cinline[0];
|
2023-04-12 03:39:54 +08:00
|
|
|
align[count].offset = offset;
|
2023-04-12 03:41:11 +08:00
|
|
|
if (align[count].nuc == NULL)
|
2023-04-12 03:39:54 +08:00
|
|
|
Errorout("Calloc problem");
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
sscanf((char *)(cinline + 1), "%s",
|
|
|
|
align[count].name);
|
2023-04-12 03:39:54 +08:00
|
|
|
len = 0;
|
|
|
|
break;
|
|
|
|
default:
|
2023-04-12 03:41:11 +08:00
|
|
|
if (len + strlen(cinline) > maxlen) {
|
|
|
|
maxlen = (maxlen + strlen(cinline)) * 2;
|
2023-04-12 03:39:54 +08:00
|
|
|
align[count].nuc =
|
2023-04-12 03:41:11 +08:00
|
|
|
Realloc(align[count].nuc, maxlen);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
for (j = 0; j < strlen(cinline); j++)
|
|
|
|
align[count].nuc[j + len] = cinline[j];
|
|
|
|
len += strlen(cinline);
|
2023-04-12 03:39:54 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-04-12 03:41:11 +08:00
|
|
|
if (count == -1) exit(1);
|
2023-04-12 03:39:54 +08:00
|
|
|
|
|
|
|
align[count].length = len;
|
2023-04-12 03:41:11 +08:00
|
|
|
align[count].nuc[len] = '\0';
|
|
|
|
return (++count);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int Errorout(char *string)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
fprintf(stderr, "%s\n", string);
|
2023-04-12 03:39:54 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int WriteData(FILE *file, struct data_format data[], int count)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
int i, j;
|
|
|
|
for (j = 0; j < count; j++) {
|
|
|
|
if (data[j].offset)
|
|
|
|
fprintf(file, "\n%c%s(%d)", data[j].type, data[j].name,
|
|
|
|
data[j].offset);
|
2023-04-12 03:39:54 +08:00
|
|
|
else
|
2023-04-12 03:41:11 +08:00
|
|
|
fprintf(file, "\n%c%s", data[j].type, data[j].name);
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
for (i = 0; i < data[j].length; i++) {
|
|
|
|
if (i % 60 == 0) fputc('\n', file);
|
|
|
|
fputc(data[j].nuc[i], file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2023-04-12 03:39:54 +08:00
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
int ErrorOut(int code, char *string)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
if (code == 0) {
|
|
|
|
fprintf(stderr, "Error:%s\n", string);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
return 0;
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
char *Calloc(int count, int size)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
char *temp;
|
|
|
|
|
|
|
|
temp = (char *)calloc(count, size);
|
|
|
|
if (temp == NULL) {
|
|
|
|
fprintf(stdout, "Error in Calloc\n");
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return (temp);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
|
2023-04-12 03:41:11 +08:00
|
|
|
char *Realloc(char *block, int size)
|
2023-04-12 03:39:54 +08:00
|
|
|
{
|
2023-04-12 03:41:11 +08:00
|
|
|
char *temp;
|
|
|
|
temp = (char *)realloc(block, size);
|
|
|
|
if (temp == NULL) {
|
|
|
|
fprintf(stdout, "Error in Calloc\n");
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return (temp);
|
2023-04-12 03:39:54 +08:00
|
|
|
}
|
|
|
|
|