Packages/BioArchLinux/prodigal/seqc.patch
2023-03-05 00:26:43 +05:30

203 lines
7.5 KiB
Diff

--- a/sequence.c 2023-03-04 23:37:47.000573653 +0530
+++ b/sequence.c 2023-03-04 23:38:55.239315879 +0530
@@ -24,12 +24,12 @@
Read the sequence for training purposes. If we encounter multiple
sequences, we insert TTAATTAATTAA between each one to force stops in all
six frames. When we hit MAX_SEQ bp, we stop and return what we've got so
- far for training. This routine reads in FASTA, and has a very 'loose'
- Genbank and Embl parser, but, to be safe, FASTA should generally be
+ far for training. This routine reads in FASTA, and has a very 'loose'
+ Genbank and Embl parser, but, to be safe, FASTA should generally be
preferred.
*******************************************************************************/
-int read_seq_training(FILE *fp, unsigned char *seq, unsigned char *useq,
+int read_seq_training(fptr fp, unsigned char *seq, unsigned char *useq,
double *gc, int do_mask, mask *mlist, int *nm) {
char line[MAX_LINE+1];
int hdr = 0, fhdr = 0, bctr = 0, len = 0, wrn = 0;
@@ -37,7 +37,7 @@
unsigned int i, gapsize = 0;
line[MAX_LINE] = '\0';
- while(fgets(line, MAX_LINE, fp) != NULL) {
+ while(INPUT_GETS(line, MAX_LINE, fp) != NULL) {
if(hdr == 0 && line[strlen(line)-1] != '\n' && wrn == 0) {
wrn = 1;
fprintf(stderr, "\n\nWarning: saw non-sequence line longer than ");
@@ -58,7 +58,7 @@
else if(hdr == 1 && (line[0] == '/' && line[1] == '/')) hdr = 0;
else if(hdr == 1) {
if(strstr(line, "Expand") != NULL && strstr(line, "gap") != NULL) {
- sscanf(strstr(line, "gap")+4, "%u", &gapsize);
+ sscanf(strstr(line, "gap")+4, "%u", &gapsize);
if(gapsize < 1 || gapsize > MAX_LINE) {
fprintf(stderr, "Error: gap size in gbk file can't exceed line");
fprintf(stderr, " size.\n");
@@ -73,7 +73,7 @@
if(len - mask_beg >= MASK_SIZE) {
if(*nm == MAX_MASKS) {
fprintf(stderr, "Error: saw too many regions of 'N''s in the ");
- fprintf(stderr, "sequence.\n");
+ fprintf(stderr, "sequence.\n");
exit(52);
}
mlist[*nm].begin = mask_beg;
@@ -93,8 +93,8 @@
set(seq, bctr+1);
gc_cont++;
}
- else if(line[i] != 'a' && line[i] != 'A') {
- set(seq, bctr+1);
+ else if(line[i] != 'a' && line[i] != 'A') {
+ set(seq, bctr+1);
set(useq, len);
}
bctr+=2; len++;
@@ -119,7 +119,7 @@
/* This routine reads in the next sequence in a FASTA/GB/EMBL file */
-int next_seq_multi(FILE *fp, unsigned char *seq, unsigned char *useq,
+int next_seq_multi(fptr fp, unsigned char *seq, unsigned char *useq,
int *sctr, double *gc, int do_mask, mask *mlist, int *nm,
char *cur_hdr, char *new_hdr) {
char line[MAX_LINE+1];
@@ -131,7 +131,7 @@
if(*sctr > 0) reading_seq = 1;
line[MAX_LINE] = '\0';
- while(fgets(line, MAX_LINE, fp) != NULL) {
+ while(INPUT_GETS(line, MAX_LINE, fp) != NULL) {
if(reading_seq == 0 && line[strlen(line)-1] != '\n' && wrn == 0) {
wrn = 1;
fprintf(stderr, "\n\nWarning: saw non-sequence line longer than ");
@@ -169,7 +169,7 @@
}
else if(reading_seq == 1) {
if(strstr(line, "Expand") != NULL && strstr(line, "gap") != NULL) {
- sscanf(strstr(line, "gap")+4, "%u", &gapsize);
+ sscanf(strstr(line, "gap")+4, "%u", &gapsize);
if(gapsize < 1 || gapsize > MAX_LINE) {
fprintf(stderr, "Error: gap size in gbk file can't exceed line");
fprintf(stderr, " size.\n");
@@ -184,7 +184,7 @@
if(len - mask_beg >= MASK_SIZE) {
if(*nm == MAX_MASKS) {
fprintf(stderr, "Error: saw too many regions of 'N''s in the ");
- fprintf(stderr, "sequence.\n");
+ fprintf(stderr, "sequence.\n");
exit(55);
}
mlist[*nm].begin = mask_beg;
@@ -204,8 +204,8 @@
set(seq, bctr+1);
gc_cont++;
}
- else if(line[i] != 'a' && line[i] != 'A') {
- set(seq, bctr+1);
+ else if(line[i] != 'a' && line[i] != 'A') {
+ set(seq, bctr+1);
set(useq, len);
}
bctr+=2; len++;
@@ -240,14 +240,14 @@
/* Takes rseq and fills it up with the rev complement of seq */
-void rcom_seq(unsigned char *seq, unsigned char *rseq, unsigned char *useq,
+void rcom_seq(unsigned char *seq, unsigned char *rseq, unsigned char *useq,
int len) {
int i, slen=len*2;
for(i = 0; i < slen; i++)
if(test(seq, i) == 0) set(rseq, slen-i-1+(i%2==0?-1:1));
for(i = 0; i < len; i++) {
if(test(useq, i) == 1) {
- toggle(rseq, slen-1-i*2);
+ toggle(rseq, slen-1-i*2);
toggle(rseq, slen-2-i*2);
}
}
@@ -564,7 +564,7 @@
}
/*******************************************************************************
- Creates a GC frame plot for a given sequence. This is simply a string with
+ Creates a GC frame plot for a given sequence. This is simply a string with
the highest GC content frame for a window centered on position for every
position in the sequence.
*******************************************************************************/
@@ -654,7 +654,7 @@
}
/*******************************************************************************
- Finds the highest-scoring region similar to AGGAGG in a given stretch of
+ Finds the highest-scoring region similar to AGGAGG in a given stretch of
sequence upstream of a start.
*******************************************************************************/
@@ -663,14 +663,14 @@
double match[6], cur_ctr, dis_flag;
limit = imin(6, start-4-pos);
- for(i = limit; i < 6; i++) match[i] = -10.0;
+ for(i = 0; i < 6; i++) match[i] = -10.0;
/* Compare the 6-base region to AGGAGG */
for(i = 0; i < limit; i++) {
- if(pos+i < 0) continue;
- if(i%3 == 0 && is_a(seq, pos+i) == 1) match[i] = 2.0;
- else if(i%3 != 0 && is_g(seq, pos+i) == 1) match[i] = 3.0;
- else match[i] = -10.0;
+ if(pos + i >= 0) {
+ if(i%3 == 0 && is_a(seq, pos+i) == 1) match[i] = 2.0;
+ else if(i%3 != 0 && is_g(seq, pos+i) == 1) match[i] = 3.0;
+ }
}
/* Find the maximally scoring motif */
@@ -730,7 +730,7 @@
}
/*******************************************************************************
- Finds the highest-scoring region similar to AGGAGG in a given stretch of
+ Finds the highest-scoring region similar to AGGAGG in a given stretch of
sequence upstream of a start. Only considers 5/6-mers with 1 mismatch.
*******************************************************************************/
@@ -739,18 +739,19 @@
double match[6], cur_ctr, dis_flag;
limit = imin(6, start-4-pos);
- for(i = limit; i < 6; i++) match[i] = -10.0;
+ for(i = 0; i < 6; i++) match[i] = -10.0;
/* Compare the 6-base region to AGGAGG */
for(i = 0; i < limit; i++) {
- if(pos+i < 0) continue;
- if(i % 3 == 0) {
- if(is_a(seq, pos+i) == 1) match[i] = 2.0;
- else match[i] = -3.0;
- }
- else {
- if(is_g(seq, pos+i) == 1) match[i] = 3.0;
- else match[i] = -2.0;
+ if(pos+i >= 0) {
+ if(i % 3 == 0) {
+ if(is_a(seq, pos+i) == 1) match[i] = 2.0;
+ else match[i] = -3.0;
+ }
+ else {
+ if(is_g(seq, pos+i) == 1) match[i] = 3.0;
+ else match[i] = -2.0;
+ }
}
}
@@ -801,4 +802,4 @@
int imin(int x, int y) {
if(x < y) return x;
return y;
-}
+}
\ No newline at end of file