Packages/mafft/src/mafft-7.487-with-extensions/core/addfunctions.c
starsareintherose eeb4358381 add mafft
2021-08-10 22:21:03 +08:00

1851 lines
47 KiB
C

#include "mltaln.h"
static void strncpy0( char *s1, char *s2, int n )
{
while( n-- ) *s1++ = *s2++;
*s1 = 0;
}
#if 0
static void strncpy0x( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0b0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0b1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0b2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0n0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0n1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0n2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0a0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0a1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0a2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0o0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0o1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
static void strncpy0o2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
#endif
static void eqpick( char *aseq, char *seq )
{
for( ; *seq != 0; seq++ )
{
if( *seq != '=' )
*aseq++ = *seq;
}
*aseq = 0;
}
void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap
{
int i, newlen;
double *effarr0, *effarr2;
int *allgap0, *allgap2;
double dumdb;
int alcount0, alcount2;
if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha...
// reporterr( "profilealignment!\n" );
commongappick( n0, aln0 );
commongappick( n2, aln2 );
effarr0 = AllocateDoubleVec( n0 );
effarr2 = AllocateDoubleVec( n2 );
allgap0 = AllocateIntVec( n0 );
allgap2 = AllocateIntVec( n2 );
#if 1 // new weight 2015/Jun
alcount0 = 0;
for( i=0; i<n0; i++ )
{
if( isallgap( aln0[i] ) ) allgap0[i] = 1;
else
{
alcount0++;
allgap0[i] = 0;
}
}
alcount2 = 0;
for( i=0; i<n2; i++ )
{
if( isallgap( aln2[i] ) ) allgap2[i] = 1;
else
{
alcount2++;
allgap2[i] = 0;
}
}
for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 );
for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 );
#else
eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
#endif
newgapstr = "-";
if( alg == 'M' )
MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, NULL, NULL, NULL, 0.0, 0.0 ); //outgap=1, 2014/Dec/1
else
A__align( n_dis_consweight_multi, penalty, penalty_ex, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); //outgap=1, 2014/Dec/1
newlen = strlen( aln0[0] );
#if 0 // tabun hitsuyou
for( j=0; j<newlen; j++ )
{
// fprintf( stderr, "j=%d\n", j );
for( i=0; i<n0; i++ )
{
if( aln0[i][j] != '-' ) break;
}
if( i == n0 )
{
for( i=0; i<n1; i++ )
{
if( aln1[i][j] != '-' ) break;
}
}
else i = -1;
if( i == n1 )
{
for( i=0; i<n1; i++ ) aln1[i][j] = '=';
}
}
fprintf( stderr, "in profilealignment,\n" );
for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
#endif
free( effarr0 );
free( effarr2 );
free( allgap0 );
free( allgap2 );
}
static void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap
{
int i, j, newlen;
double *effarr0 = NULL, *effarr2 = NULL;
int *allgap0 = NULL, *allgap2 = NULL;
double dumdb;
int alcount0, alcount2;
char *cptr;
// effarr0 = AllocateDoubleVec( n0 );
// effarr2 = AllocateDoubleVec( n2 );
// allgap0 = AllocateIntVec( n0 );
// allgap2 = AllocateIntVec( n2 );
//
if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha...
// reporterr( "In profilealignment(), strlen( aln0[0] ) %d\n", strlen( aln0[0] ) );
// reporterr( "In profilealignment(), strlen( aln2[0] ) %d\n", strlen( aln2[0] ) );
commongappick( n0, aln0 );
commongappick( n2, aln2 );
// reporterr( "after commongappick, strlen( aln0[0] ) %d\n", strlen( aln0[0] ) );
// reporterr( "after commongappick, strlen( aln2[0] ) %d\n", strlen( aln2[0] ) );
// reporterr( "\n\n\n" );
if( aln2[0][0] == 0 )
{
newlen = j = strlen( aln0[0] );
cptr = aln2[0];
while( j-- ) *cptr++ = '-';
*cptr = 0;
cptr = aln2[0];
for( i=1; i<n2; i++ ) strcpy( aln2[i], cptr );
return;
}
#if 1
effarr0 = (double *)malloc( n0 * sizeof( double ) );
effarr2 = (double *)malloc( n2 * sizeof( double ) );
allgap0 = (int *)malloc( n0 * sizeof( int ) );
allgap2 = (int *)malloc( n2 * sizeof( int ) );
#else
effarr0 = (double *)calloc( n0, sizeof( double ) );
effarr2 = (double *)calloc( n2, sizeof( double ) );
allgap0 = (int *)calloc( n0, sizeof( int ) );
allgap2 = (int *)calloc( n2, sizeof( int ) );
#endif
#if 1 // new weight 2015/Jun
alcount0 = 0;
for( i=0; i<n0; i++ )
{
if( isallgap( aln0[i] ) ) allgap0[i] = 1;
else
{
alcount0++;
allgap0[i] = 0;
}
}
alcount2 = 0;
for( i=0; i<n2; i++ )
{
if( isallgap( aln2[i] ) ) allgap2[i] = 1;
else
{
alcount2++;
allgap2[i] = 0;
}
}
for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 ); else effarr0[i] = 0.0; // malloc / alloca no baai
for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 ); else effarr2[i] = 0.0; // malloc / alloca no baai
#else
for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 ); // calloc no baai
for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 ); // calloc no baai
eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
#endif
newgapstr = "-";
if( alg == 'M' )
MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, NULL, NULL, NULL, 0.0, 0.0 ); //outgap=1, 2014/Dec/1
else
A__align( n_dis_consweight_multi, penalty, penalty_ex, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); //outgap=1, 2014/Dec/1
newlen = strlen( aln0[0] );
for( i=0; i<newlen; i++ ) aln1[0][i] = '-';
aln1[0][i] = 0;
for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] );
for( j=0; j<newlen; j++ )
{
// fprintf( stderr, "j=%d\n", j );
for( i=0; i<n0; i++ )
{
if( aln0[i][j] != '-' ) break;
}
if( i == n0 )
{
for( i=0; i<n1; i++ )
{
if( aln1[i][j] != '-' ) break;
}
}
else i = -1;
if( i == n1 )
{
for( i=0; i<n1; i++ ) aln1[i][j] = '=';
}
}
#if 0
fprintf( stderr, "in profilealignment, before commongappick\n" );
for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = %s\n", i, aln0[i] );
for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = %s\n", i, aln1[i] );
for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = %s\n", i, aln2[i] );
#endif
#if 0
fprintf( stderr, "in profilealignment, after commongappick\n" );
for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = %s\n", i, aln0[i] );
for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = %s\n", i, aln1[i] );
for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = %s\n", i, aln2[i] );
#endif
free( effarr0 );
free( effarr2 );
free( allgap0 );
free( allgap2 );
}
void eq2dashmatomete( char **s, int n )
{
int i, j;
char sj;
for( j=0; (sj=s[0][j]); j++ )
{
if( sj == '=' )
{
for( i=0; i<n; i++ )
{
s[i][j] = '-';
}
}
}
}
void eq2dashmatometehayaku( char **s, int n )
{
int i, j, c;
int *tobechanged;
int len = strlen( s[0] );
tobechanged = calloc( len+1, sizeof( int ) ); // len+1, 2017/Nov/15
c = 0;
for( j=0; j<len; j++ )
{
if( s[0][j] == '=' ) tobechanged[c++] = j;
}
tobechanged[c] = -1;
for( i=0; i<n; i++ )
{
for( c=0; (j=tobechanged[c])!=-1; c++ )
s[i][j] = '-';
}
free( tobechanged );
}
void eq2dash( char *s )
{
while( *s )
{
if( *s == '=' )
{
*s = '-';
}
s++;
}
}
static void plus2gapchar( char *s, char gapchar )
{
while( *s )
{
if( *s == '+' )
{
*s = gapchar;
}
s++;
}
}
void findnewgaps( int n, int rep, char **seq, int *gaplen )
{
int i, pos, len, len1;
len = strlen( seq[0] );
// for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai
len1 = len + 1;
for( i=0; i<len1; i++ ) gaplen[i] = 0; // realloc de shokika sareteirukara iru!
pos = 0;
for( i=0; i<len; i++ )
{
if( seq[rep][i] == '=' )
{
if( disp ) fprintf( stderr, "Newgap! pos = %d\n", pos );
gaplen[pos]++;
}
else
pos++;
}
#if 0
if( disp )
{
fprintf( stderr, "\ngaplen[] in findnewgaps() = \n" );
for(i=0; i<pos; i++ ) fprintf( stderr, "%d ", gaplen[i] );
fprintf( stderr, "\n" );
reporterr( "pos=%d\n", pos );
}
#endif
}
void findcommongaps( int n, char **seq, int *gapmap )
{
int i, j, pos, len, len1;
len = strlen( seq[0] );
len1 = len+1;
// fprintf( stderr, "seq[0] = %s\n", seq[0] );
for( i=0; i<len1; i++ ) gapmap[i] = 0;
pos = 0;
for( i=0; i<len; i++ )
{
for( j=0; j<n; j++ )
if( seq[j][i] != '-' ) break;
if( j == n ) gapmap[pos]++;
else
pos++;
}
#if 0
for( i=0; i<pos; i++ )
{
fprintf( stderr, "vec[%d] = %d\n", i, gapmap[i] );
}
#endif
}
void adjustgapmap( int newlen, int *gapmap, char *seq )
{
int j;
int pos;
int newlen1 = newlen+1;
int *tmpmap;
tmpmap = AllocateIntVec( newlen+2 );
j = 0;
pos = 0;
while( *seq )
{
// fprintf( stderr, "j=%d *seq = %c\n", j, *seq );
if( *seq++ == '=' )
tmpmap[j++] = 0;
else
{
tmpmap[j++] = gapmap[pos++];
}
}
tmpmap[j++] = gapmap[pos];
for(j=0; j<newlen1; j++)
gapmap[j] = tmpmap[j];
free( tmpmap );
#if 0
reporterr( "gapmap in adjustgapmap() =\n" );
for(j=0; j<newlen1; j++) reporterr( "%d ", gapmap[j] );
reporterr( "length = %d\n", newlen );
#endif
}
static int countnogaplen( int *gaplen, int *term )
{
int v = 0;
while( gaplen < term )
{
if( *gaplen++ == 0 ) v++;
else break;
}
return( v );
}
static int countgapmap( int *gapmap, int *term )
{
int v = 0;
while( gapmap < term )
{
// reporterr( "*gapmap = %d\n", *gapmap );
if( *gapmap++ == 0 ) v++;
else break;
}
return( v );
}
void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar )
{
int *mar;
char *gaps;
char *cptr;
int i, j, k, len, rep, len0, lp, blocklen;
char **mseq2, **mseq0, **mseq1;
char **aseq, *newchar;
int ngroup2, ngroup0, ngroup1;
int *list0, *list1, *list2;
int posin12, gapshift, newpos;
int mlen1, mlen0, mlen2;
mar = calloc( njob, sizeof( int ) );
list0 = calloc( njob, sizeof( int ) );
list1 = calloc( njob, sizeof( int ) );
list2 = calloc( njob, sizeof( int ) );
for( i=0; i<njob; i++ ) mar[i] = 0;
for( i=0; i<njob; i++ )
{
if( alreadyaligned[i]==0 ) mar[i] = 3;
}
for( i=0; (k=ex1[i])>-1; i++ )
{
mar[k] = 1;
// fprintf( stderr, "excluding %d\n", ex1[i] );
}
for( i=0; (k=ex2[i])>-1; i++ )
{
mar[k] = 2;
// fprintf( stderr, "excluding %d\n", ex2[i] );
}
ngroup2 = ngroup1 = ngroup0 = 0;
for( i=0; i<njob; i++ )
{
if( mar[i] == 2 )
{
list2[ngroup2] = i;
ngroup2++;
}
if( mar[i] == 1 )
{
list1[ngroup1] = i;
ngroup1++;
}
if( mar[i] == 0 )
{
list0[ngroup0] = i;
// fprintf( stderr, "inserting new gaps to %d\n", i );
ngroup0++;
}
}
list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
if( ngroup0 == 0 )
{
// fprintf( stderr, "Nothing to do\n" );
free( mar );
free( list0 );
free( list1 );
free( list2 );
return;
}
for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break;
rep = i;
len = strlen( seq[rep] );
len0 = len+1;
//
// if( i == njob )
// {
//// fprintf( stderr, "Nothing to do\n" );
// free( mar );
// return;
// }
mseq2 = AllocateCharMtx( ngroup2, alloclen );
mseq1 = AllocateCharMtx( ngroup1, alloclen );
mseq0 = AllocateCharMtx( ngroup0, alloclen );
aseq = AllocateCharMtx( njob, alloclen );
gaps = calloc( alloclen, sizeof( char ) );
for( i=0; i<njob; i++ ) aseq[i][0] = 0;
newpos = 0;
posin12 = 0;
#if 0
fprintf( stderr, "\ngaplen[] = \n" );
for(i=0; i<len0; i++ ) fprintf( stderr, "%d", gaplen[i] );
fprintf( stderr, "\n" );
fprintf( stderr, "\ngapmap[] = \n" );
for(i=0; i<len0; i++ ) fprintf( stderr, "%d", gapmap[i] );
fprintf( stderr, "\n" );
#endif
for( j=0; j<len0; j++ )
{
// fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
if( gaplen[j] )
{
// fprintf( stderr, "j=%d GAP!\n", j );
for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
mlen0 = mlen1 = mlen2 = 0;
gapshift = gaplen[j];
cptr = gaps;
while( gapshift-- ) *cptr++ = gapchar;
*cptr = 0;
gapshift = gaplen[j];
for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift );
for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
posin12 += gapshift;
mlen0 += gapshift;
mlen1 += gapshift;
mlen2 += gapshift;
gapshift = gapmap[posin12];
// fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] );
for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift );
for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
mlen0 += gapshift;
mlen1 += gapshift;
mlen2 += gapshift;
#if 0
for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
#endif
if( gapshift )
{
// reporterr( "profilealignment (j=%d)!!!\n", j );
profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
}
j += gapshift;
posin12 += gapshift;
newpos = strlen( aseq[rep] ); // kufuu?
for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
// fprintf( stderr, "gapshift = %d\n", gapshift );
}
blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
// fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
// blocklen = 1;
newpos = strlen( aseq[rep] );
#if 0
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
#else
for( i=0; i<ngroup0; i++ )
{
lp = list0[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+j, blocklen );
}
for( i=0; i<ngroup1; i++ )
{
lp = list1[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+posin12, blocklen );
}
for( i=0; i<ngroup2; i++ )
{
lp = list2[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+posin12, blocklen );
}
// fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] );
// fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] );
// fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] );
#endif
// fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
j += (blocklen-1);
posin12 += (blocklen-1);
posin12++;
}
#if 0
fprintf( stderr, "\n" );
for( i=0; i<ngroup0; i++ ) fprintf( stderr, " seq[l0i] = %s\n", seq[list0[i]] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, " seq[l1i] = %s\n", seq[list1[i]] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, " seq[l2i] = %s\n", seq[list2[i]] );
fprintf( stderr, "=====>\n" );
for( i=0; i<ngroup0; i++ ) fprintf( stderr, "aseq[l0i] = %s\n", aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, "aseq[l1i] = %s\n", aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, "aseq[l2i] = %s\n", aseq[list2[i]] );
#endif
// for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
free( mar );
free( gaps );
free( list0 );
free( list1 );
free( list2 );
FreeCharMtx( mseq2 );
FreeCharMtx( mseq1 ); // ? added 2012/02/12
FreeCharMtx( mseq0 );
FreeCharMtx( aseq ); // ? added 2012/02/12
}
void insertnewgaps_bothorders( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int gapmaplen, int alloclen, char alg, char gapchar )
{
// int disp = 0;
int *mar;
char *gaps;
char *cptr;
int i, j, k, len, rep, len0, lp, blocklen, blockmap;
char **mseq2, **mseq0, **mseq1;
char **aseq, *newchar;
int ngroup2, ngroup0, ngroup1;
int *list0, *list1, *list2;
int posin12, gapshifta, gapshiftn, gapshiftb, newpos;
int mlen1, mlen0, mlen2;
int jinc;
#if 0 // for debug
int bug = 0;
char *nogapseq1, *nogapseq2;
nogapseq1 = calloc( alloclen, sizeof( char ) );
nogapseq2 = calloc( alloclen, sizeof( char ) );
#endif // for debug
mar = calloc( njob, sizeof( int ) );
list0 = calloc( njob, sizeof( int ) );
list1 = calloc( njob, sizeof( int ) );
list2 = calloc( njob, sizeof( int ) );
for( i=0; i<njob; i++ ) mar[i] = 0;
for( i=0; i<njob; i++ )
{
if( alreadyaligned[i]==0 ) mar[i] = 3;
}
for( i=0; (k=ex1[i])>-1; i++ )
{
mar[k] = 1;
// fprintf( stderr, "excluding %d\n", ex1[i] );
}
for( i=0; (k=ex2[i])>-1; i++ )
{
mar[k] = 2;
// fprintf( stderr, "excluding %d\n", ex2[i] );
}
ngroup2 = ngroup1 = ngroup0 = 0;
for( i=0; i<njob; i++ )
{
if( mar[i] == 2 )
{
list2[ngroup2] = i;
ngroup2++;
}
if( mar[i] == 1 )
{
list1[ngroup1] = i;
ngroup1++;
}
if( mar[i] == 0 )
{
list0[ngroup0] = i;
// fprintf( stderr, "inserting new gaps to %d\n", i );
ngroup0++;
}
}
list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
if( ngroup0 == 0 )
{
// fprintf( stderr, "Nothing to do\n" );
free( mar );
free( list0 );
free( list1 );
free( list2 );
return;
}
for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break;
rep = i;
len = strlen( seq[rep] );
len0 = len+1;
// reporterr( "alloclen = %d\n", alloclen );
// reporterr( "len0 = %d\n", strlen( seq[list0[0]] ) );
// reporterr( "len1 = %d\n", strlen( seq[list1[0]] ) );
// reporterr( "gapmaplen = %d\n", gapmaplen );
// reporterr( "ng0, ng1, ng2 = %d, %d, %d\n", ngroup0, ngroup1, ngroup2 );
//
// if( i == njob )
// {
//// fprintf( stderr, "Nothing to do\n" );
// free( mar );
// return;
// }
mseq2 = AllocateCharMtx( ngroup2, alloclen );
mseq1 = AllocateCharMtx( ngroup1, alloclen );
mseq0 = AllocateCharMtx( ngroup0, alloclen );
aseq = AllocateCharMtx( njob, alloclen );
gaps = calloc( alloclen, sizeof( char ) );
for( i=0; i<njob; i++ ) aseq[i][0] = 0;
newpos = 0;
posin12 = 0;
#if 0
if( disp )
{
int p;
fprintf( stderr, "\n" );
fprintf( stderr, "len0 = %d\n", len0 );
fprintf( stderr, "\ngaplen[] = \n" );
reporterr( "seq0[0] = %s\n", seq[list0[0]] );
reporterr( "seq1[1] = %s\n", seq[list1[0]] );
reporterr( "seq2[2] = %s\n", seq[list2[0]] );
reporterr( "seq[rep] = %s\n", seq[rep] );
for(i=0,p=0; i<gapmaplen; i++,p++ )
{
fprintf( stderr, "gaplen %d:%d %-*.*s\n", p, gaplen[p], gaplen[p]+1, gaplen[p]+1, seq[list1[0]]+i );
i += gaplen[p];
}
}
fprintf( stderr, "\n" );
fprintf( stderr, "\ngapmap[] = \n" );
for(i=0; i<gapmaplen; i++ ) fprintf( stderr, "gapmap %d:%d %-*.*s\n", i, gapmap[i], gapmap[i]+1, gapmap[i]+1, seq[list1[0]]+i );
reporterr( "seq1 = \n" );
reporterr( "%s\n", seq[list1[0]] );
#endif
for( j=0; j<len0; j++ )
{
// fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
// if( gaplen[j] || gapmap[posin12] )
if( gaplen[j] || gapmap[posin12] )
{
// fprintf( stderr, "j=%d GAP!\n", j );
for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
mlen0 = mlen1 = mlen2 = 0;
gapshiftb = gapmap[posin12];
// fprintf( stderr, "\ngapmap[%d] kouho = %d, posint12 = %d\n", posin12, gapmap[posin12], posin12 );
if( gapshiftb ) // koko ga osoi!
{
for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshiftb ); // tokuni koko!
for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshiftb );
for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshiftb );
mlen0 += gapshiftb;
mlen1 += gapshiftb;
mlen2 += gapshiftb;
#if 0
reporterr( "\n\n" );
for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq2[%d] = %s\n", i, mseq2[i] );
#endif
posin12 += gapshiftb;
}
#if 0 // nen no tame
for( i=0, jinc=0; i<gapshiftb; i++ ) jinc += 1+gaplen[j+i];
if( jinc != gapshiftb )
{
reporterr( "\n#################!\n" );
reporterr( "# Unexpected gap pattern!\n" );
reporterr( "# There are overlapped %d gaps in gaplen[] and gapmap[]. j=%d-%d, posin12=%d-%d\n", jinc, j-gapshiftb-jinc, j, posin12-gapshiftb, posin12 );
reporterr( "\n#################!\n" );
exit( 1 );
}
j += gapshiftb;
#else
j += gapshiftb;
#endif
#if 0
if( disp && gapshiftb )
{
reporterr( "after gapshiftb, j=%d, posin12=%d\n", j, posin12 );
reporterr( "mseq0[0] = %s\n", mseq0[0] );
reporterr( "mseq1[0] = %s\n", mseq1[0] );
reporterr( "mseq2[0] = %s\n", mseq2[0] );
}
#endif
// fprintf( stderr, "gaplen[%d]=%d, posin12 = %d\n", j, gaplen[j], posin12 );
while( 1 )
{
gapshiftn = gaplen[j];
if( gapshiftn )
{
cptr = gaps;
while( gapshiftn-- ) *cptr++ = gapchar;
*cptr = 0;
gapshiftn = gaplen[j];
for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshiftn );
for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshiftn );
for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshiftn );
posin12 += gapshiftn;
mlen0 += gapshiftn;
mlen1 += gapshiftn;
mlen2 += gapshiftn;
#if 0
for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq2[%d] = %s\n", i, mseq2[i] );
#endif
}
#if 0
if( disp && gapshiftn )
{
reporterr( "after gapshiftn (j=%d, gaplen[j]=%d, posin12=%d, gapshiftn=%d)\n", j, gaplen[j], posin12-gapshiftn, gapshiftn );
reporterr( "mseq0[0] = %s\n", mseq0[0] );
reporterr( "mseq1[0] = %s\n", mseq1[0] );
reporterr( "mseq2[0] = %s\n", mseq2[0] );
}
#endif
gapshifta = gapmap[posin12];
// fprintf( stderr, "gapmap[%d] kouho = %d, posin12 = %d\n", posin12, gapmap[posin12], posin12 );
if( gapshifta )
{
for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshifta );
for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshifta );
for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshifta );
mlen0 += gapshifta;
mlen1 += gapshifta;
mlen2 += gapshifta;
posin12 += gapshifta;
#if 0
for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq2[%d] = %s\n", i, mseq2[i] );
#endif
}
#if 0
j += gapshifta; // BUG!!
#else
// for( i=1, jinc=1; i<gapshifta; i++ ) jinc += 1+gaplen[j+i];
for( i=1, jinc=0; i<=gapshifta; i++ ) jinc += 1+gaplen[j+i];
// j += jinc;
j += gapshifta;
if( jinc == gapshifta ) break;
reporterr( "(a) There are overlapped %d gaps in gaplist[] and gapmap[]. j=%d-%d, posin12=%d-%d, jinc=%d, gapshifta=%d\n", jinc, j-gapshifta-jinc, j, posin12-gapshifta, posin12, jinc, gapshifta );
#endif
}
#if 0
if( disp && gapshifta )
{
reporterr( "after gapshifta, j=%d, posin12=%d\n", j, posin12 );
reporterr( "mseq0[0] = %s\n", mseq0[0] );
reporterr( "mseq1[0] = %s\n", mseq1[0] );
reporterr( "mseq2[0] = %s\n", mseq2[0] );
}
#endif
if( gapshiftb + gapshifta )
{
#if 0
for( i=0; i<1; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<1; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
#endif
// if( disp ) reporterr( "profilealignment (j=%d)!!!\n", j );
profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
}
newpos = strlen( aseq[rep] ); // kufuu?
for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
#if 0
if( disp )
{
reporterr( "after profilealignment\n" );
reporterr( "mseq0[0] = %s\n", mseq0[0] );
reporterr( "mseq1[0] = %s\n", mseq1[0] );
reporterr( "mseq2[0] = %s\n", mseq2[0] );
gappick0equalminustmptmptmp( nogapseq1, aseq[list1[0]] );
gappick0equalminustmptmptmp( nogapseq2, seq[list1[0]] );
reporterr( "aseq[list1[0].nogap = %s\n", nogapseq1 );
reporterr( " seq[list1[0].nogap = %s\n", nogapseq2 );
}
#endif
// fprintf( stderr, "gapshift = %d\n", gapshift );
}
newpos = strlen( aseq[rep] );
blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
// fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
blockmap = 1 + countgapmap( gapmap+posin12+1, gapmap+gapmaplen );
// fprintf( stderr, "posin12=%d, blockmap=%d, len0=%d\n", posin12, blockmap, len0 );
// if( disp ) reporterr( "newpos = %d, blocklen = %d, blockmap = %d, j=%d, posin12=%d\n", newpos, blocklen, blockmap, j, posin12 );
if( blockmap < blocklen ) blocklen = blockmap;
#if 0
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
#else
// if( j >= len0 ) break; // iru?
for( i=0; i<ngroup0; i++ )
{
lp = list0[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+j, blocklen );
// *(newchar+blocklen) = 0; iranai
}
for( i=0; i<ngroup1; i++ )
{
lp = list1[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+posin12, blocklen );
// *(newchar+blocklen) = 0; iranai
}
for( i=0; i<ngroup2; i++ )
{
lp = list2[i];
newchar = aseq[lp] + newpos;
strncpy0( newchar, seq[lp]+posin12, blocklen );
// *(newchar+blocklen) = 0; iranai
}
// reporterr( "adding %c to aseq[list1[0]]\n", seq[list1[0]][posin12] );
// for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### aseq0[%d] = %s\n", i, aseq[list0[i]] );
// for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### aseq1[%d] = %s\n", i, aseq[list1[i]] );
// for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### aseq2[%d] = %s\n", i, aseq[list2[i]] );
#endif
// fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
j += (blocklen-1);
// j += gaplen[j];
posin12 += (blocklen-1); // sono aida ni gapmap wo miotosu?
posin12++;
#if 0
if( disp )
{
gappick0equalminustmptmptmp( nogapseq1, aseq[list1[0]] );
gappick0equalminustmptmptmp( nogapseq2, seq[list1[0]] );
reporterr( "aseq[list1[0].nogap = %s\n", nogapseq1 );
reporterr( " seq[list1[0].nogap = %s\n", nogapseq2 );
reporterr( "" );
// reporterr( "seq[list1[0]] = %s\n", seq[list1[0]] );
// reporterr( "seq[list2[0]] = %s\n", seq[list2[0]] );
}
#endif
}
#if 0
fprintf( stderr, "\n" );
for( i=0; i<ngroup0; i++ ) fprintf( stderr, " seq[l0i] = \n%s\n", seq[list0[i]] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, " seq[l1i] = \n%s\n", seq[list1[i]] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, " seq[l2i] = \n%s\n", seq[list2[i]] );
reporterr( "0 1 2 3 4 5 6 7 \n" );
reporterr( "012345678901234567890123456789012345678901234567890123456789012345678901234567\n" );
fprintf( stderr, "=====>\n" );
for( i=0; i<ngroup0; i++ ) fprintf( stderr, "aseq[l0i] = \n%s\n", aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, "aseq[l1i] = \n%s\n", aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, "aseq[l2i] = \n%s\n", aseq[list2[i]] );
#endif
#if 0
reporterr( "list0[0]=%d\n", list0[0] );
reporterr( "list0[%d-1]=%d\n", ngroup0, list0[ngroup0-1] );
reporterr( "seq0[list[0]]=%s\n", seq[list0[0]] );
reporterr( "aseq0[list[0]]=%s\n", aseq[list0[0]] );
#endif
#if 0
for( i=0; i<ngroup0; i++ )
{
if( strlen( aseq[list0[0]] ) != strlen( aseq[list0[i]] ) )
{
reporterr( "Length error! len[0] = %d, but len[%d] = %d\n", strlen( aseq[list0[0]] ), list0[i], strlen( aseq[list0[i]] ) );
bug = 1;
break;
}
}
for( i=0; i<ngroup0; i++ )
{
gappick0equalminustmptmptmp( nogapseq1, aseq[list0[i]] );
gappick0equalminustmptmptmp( nogapseq2, seq[list0[i]] );
if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
}
for( i=0; i<ngroup1; i++ )
{
gappick0equalminustmptmptmp( nogapseq1, aseq[list1[i]] );
gappick0equalminustmptmptmp( nogapseq2, seq[list1[i]] );
if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
}
for( i=0; i<ngroup2; i++ )
{
gappick0equalminustmptmptmp( nogapseq1, aseq[list2[i]] );
gappick0equalminustmptmptmp( nogapseq2, seq[list2[i]] );
if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
}
free( nogapseq1 );
free( nogapseq2 );
if( bug )
{
reporterr( "ERROR!!!!!!!\n" );
reporterr( ">aseq1[%d], len = %d\n%s\n", list1[0], strlen( aseq[list1[0]] ), aseq[list0[0]] );
reporterr( ">seq1[%d], len = %d\n%s\n", list1[0], strlen( seq[list1[0]] ), seq[list0[i]] );
exit( 1 );
for( i=0; i<ngroup0; i++ ) reporterr( ">aseq0[%d], len = %d\n%s\n", list0[i], strlen( aseq[list0[i]] ), aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) reporterr( ">aseq1[%d], len = %d\n%s\n", list1[i], strlen( aseq[list1[i]] ), aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) reporterr( ">aseq2[%d], len = %d\n%s\n", list2[i], strlen( aseq[list2[i]] ), aseq[list2[i]] );
for( i=0; i<ngroup0; i++ ) reporterr( ">seq0[%d], len = %d\n%s\n", list0[i], strlen( seq[list0[i]] ), seq[list0[i]] );
for( i=0; i<ngroup1; i++ ) reporterr( ">seq1[%d], len = %d\n%s\n", list1[i], strlen( seq[list1[i]] ), seq[list1[i]] );
for( i=0; i<ngroup2; i++ ) reporterr( ">seq2[%d], len = %d\n%s\n", list2[i], strlen( seq[list2[i]] ), seq[list2[i]] );
exit( 1 );
}
#endif
// for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
free( mar );
free( gaps );
free( list0 );
free( list1 );
free( list2 );
FreeCharMtx( mseq2 );
FreeCharMtx( mseq1 ); // ? added 2012/02/12
FreeCharMtx( mseq0 );
FreeCharMtx( aseq ); // ? added 2012/02/12
}
static void reflectsmoothing( char *ref, int *mem, char **seq, int len )
{
char *tmpseq;
int i, j, k, p;
// reporterr( "#### reflectsmoothing!!!!!\n" );
// if( mem[1] != -1 ) reporterr( "original = %s\n", seq[mem[1]] );
tmpseq = calloc( len+1, sizeof( char ) );
for( j=1; (i=mem[j])!=-1; j++ )
{
eqpick( tmpseq, seq[i] );
for( k=0, p=0; p<len; k++ )
{
while( ref[p] == '=' ) seq[i][p++] = '=';
seq[i][p++] = tmpseq[k];
}
}
free( tmpseq );
// if( mem[1] != -1 ) reporterr( "output = %s\n", seq[mem[1]] );
// reporterr( "#### done!!!!!\n" );
}
static int smoothing1rightmulti( int len, char *ref ) // osoi!
{
int i, j, k;
int shiftfrom = -1;
int shiftto = -1;
int *hit;
int val = 0, nhit = 0;
hit = NULL;
// reporterr( "ref (1rightmulti) = %s\n", ref );
for( i=1, nhit=0; i<len-1; i++ ) // break nashi no baai, hidarihaji ha saigo
// for( i=len-2; i>0; i-- ) // break ari no baai, migihajiha saigo
{
if( ref[i-1] == '+' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '=' )
{
// reporterr( "hit! i=%d, len=%d\n", i, len );
hit = realloc( hit, (nhit+1) * sizeof( int ) );
hit[nhit] = i;
nhit += 1;
// break;
}
}
if( nhit == 0 ) return( 0 );
for( k=0; k<nhit; k++ )
{
for( j=hit[k]+1; j<=len; j++ )
{
if( ref[j] != '=' )
{
shiftto = j-1;
break;
}
}
if( j == len && ref[len-1] == '=' )
{
reporterr( "hit[i].end = %d, j = len-1, skip!\n" );
continue;
}
if( shiftto < len-1 && ref[shiftto+1] == '+' ) continue; // muda dakara
val += 1;
shiftfrom = hit[k];
if( ref[shiftto] != '=' ) // atode sakujo
{
reporterr( "Error in smoothing1left!\n" );
exit( 1 );
}
ref[shiftto] = ref[shiftfrom];
ref[shiftfrom] = '=';
}
free( hit );
// reporterr( "ref (1rightmulti) = %s\n", ref );
reporterr( " %d out of %d have been smoothed (right).\n", val, nhit );
// if( nhit > 1 ) exit( 1 );
return( val );
}
static int smoothing1leftmulti( int len, char *ref ) // osoi!
{
int i, j, k;
int shiftfrom = -1;
int shiftto = -1;
int *hit;
int val = 0, nhit = 0;
hit = NULL;
// reporterr( "ref (1leftmulti) = %s\n", ref );
for( i=1, nhit=0; i<len-1; i++ ) // break nashi no baai, hidarihaji ha saigo
// for( i=len-2; i>0; i-- ) // break ari no baai, migihajiha saigo
{
if( ref[i-1] == '=' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '+' )
{
// reporterr( "hit! i=%d, len=%d\n", i, len );
hit = realloc( hit, (nhit+1) * sizeof( int ) );
hit[nhit] = i;
nhit += 1;
// break;
}
}
if( nhit == 0 ) return( 0 );
for( k=0; k<nhit; k++ )
{
for( j=hit[k]-1; j>-1; j-- )
{
if( ref[j] != '=' )
{
shiftto = j+1;
break;
}
}
if( j == -1 && ref[0] == '=' )
{
reporterr( "hit[i].end = %d, j = -1, skip!\n" );
continue;
}
if( shiftto > 0 && ref[shiftto-1] == '+' ) continue; // muda dakara
val += 1;
shiftfrom = hit[k];
if( ref[shiftto] != '=' ) // atode sakujo
{
reporterr( "Error in smoothing1left!\n" );
exit( 1 );
}
ref[shiftto] = ref[shiftfrom];
ref[shiftfrom] = '=';
}
free( hit );
// reporterr( "ref (1leftmulti) = %s\n", ref );
reporterr( " %d out of %d have been smoothed (left).\n", val, nhit );
// if( nhit > 1 ) exit( 1 );
return( val );
}
void restorecommongapssmoothly( int njob, int n0, char **seq, int *ex1, int *ex2, int *gapmap, int alloclen, char gapchar )
{
int *mem;
char *tmpseq;
char *cptr;
int *iptr;
int *tmpgapmap;
int i, j, k, len, rep1, rep2, len1, klim, leninserted;
int totalres;
if( n0 == 0 ) return;
mem = calloc( njob+1, sizeof( int ) ); // +1 ha iranai.
intcpy( mem, ex1 );
intcat( mem, ex2 );
// tmpseq = calloc( alloclen+2, sizeof( char ) );
// tmpgapmap = calloc( alloclen+2, sizeof( int ) );
#if 0 // iranai
for( i=0; (k=mem[i])!=-1; i++ ) // iranai
reporterr( "mem[%d] = %d\n", i, k ); // iranai
if( i == njob ) // iranai
{
fprintf( stderr, "Error in restorecommongaps()\n" );
free( mem );
exit( 1 );
}
#endif
rep1 = ex1[0];
rep2 = ex2[0];
len = strlen( seq[rep1] );
len1 = len+1;
tmpseq = calloc( alloclen, sizeof( char ) );
tmpgapmap = calloc( alloclen, sizeof( int ) );
#if 0
reporterr( "\n" );
reporterr( "seq[rep1] = %s\n", seq[rep1] );
reporterr( "seq[rep2] = %s\n", seq[rep2] );
#endif
for( k=0; (i=mem[k])!=-1; k++ )
{
cptr = tmpseq;
for( j=0; j<len1; j++ )
{
klim = gapmap[j];
// for( k=0; k<gapmap[j]; k++ )
while( klim-- )
*(cptr++) = '+'; // ???
*(cptr++) = seq[i][j];
}
*cptr = 0;
strcpy( seq[i], tmpseq );
}
#if 0
reporterr( "->\n" );
reporterr( "seq[rep1] = \n%s\n", seq[rep1] );
reporterr( "seq[rep2] = \n%s\n", seq[rep2] );
#endif
leninserted = strlen( seq[rep1] );
#if 0
reporterr( "gapmap =\n" );
for(j=0; j<len1; j++)
{
reporterr( "%d", gapmap[j] );
for( i=gapmap[j]; i>0; i-- ) reporterr( "-" );
}
reporterr( "\n" );
#endif
#if 0
resprev = 10000; // tekitou
while( 1 )
{
res = 0;
// reporterr( "\nsmoothing1right..\n" );
res = (0<smoothing1right( leninserted, seq[rep1], gapmap, seq, ex1 ));
// reporterr( "done. res = %d\n", res );
// reporterr( "smoothing1right..\n" );
res += (0<smoothing1right( leninserted, seq[rep2], gapmap, seq, ex2 ));
// reporterr( "done. res = %d\n", res );
// reporterr( "smoothing1left..\n" );
res += (0<smoothing1left( leninserted, seq[rep1], gapmap, seq, ex1 ));
// reporterr( "done. res = %d\n", res );
// reporterr( "smoothing1left..\n" );
res += (0<smoothing1left( leninserted, seq[rep2], gapmap, seq, ex2 ));
// reporterr( "done. res = %d\n", res );
reporterr( " Smoothing .. %d \n", res );
if( res >= resprev ) break;
// if( res == 0 ) break;
resprev = res;
}
#else
totalres = 0;
totalres += smoothing1rightmulti( leninserted, seq[rep1] );
totalres += smoothing1leftmulti( leninserted, seq[rep1] );
if( totalres ) reflectsmoothing( seq[rep1], ex1, seq, leninserted );
totalres = 0;
totalres += smoothing1rightmulti( leninserted, seq[rep2] );
totalres += smoothing1leftmulti( leninserted, seq[rep2] );
if( totalres ) reflectsmoothing( seq[rep2], ex2, seq, leninserted );
#endif
for( k=0; (i=mem[k])!=-1; k++ ) plus2gapchar( seq[i], gapchar );
#if 0
reporterr( "->\n" );
reporterr( "seq[rep1] = \n%s\n", seq[rep1] );
reporterr( "seq[rep2] = \n%s\n", seq[rep2] );
reporterr( "gapmap =\n" );
for(j=0; j<len1; j++)
{
reporterr( "%d", gapmap[j] );
for( i=gapmap[j]; i>0; i-- ) reporterr( "-" );
}
reporterr( "\n" );
#endif
iptr = tmpgapmap;
for( j=0; j<len1; j++ )
{
*(iptr++) = gapmap[j];
for( k=0; k<gapmap[j]; k++ )
*(iptr++) = 0;
}
*iptr = -1;
intcpy( gapmap, tmpgapmap );
// iptr = tmpgapmap;
// while( *iptr != -1 ) *gapmap++ = *iptr++;
free( mem );
free( tmpseq );
free( tmpgapmap );
}
void restorecommongaps( int njob, int n0, char **seq, int *ex1, int *ex2, int *gapmap, int alloclen, char gapchar )
{
int *mem;
char *tmpseq;
char *cptr;
int *iptr;
int *tmpgapmap;
int i, j, k, len, rep, len1, klim;
if( n0 == 0 ) return;
mem = calloc( njob+1, sizeof( int ) ); // +1 ha iranai.
intcpy( mem, ex1 );
intcat( mem, ex2 );
// tmpseq = calloc( alloclen+2, sizeof( char ) );
// tmpgapmap = calloc( alloclen+2, sizeof( int ) );
#if 0 // iranai
for( i=0; (k=mem[i])!=-1; i++ ) // iranai
reporterr( "mem[%d] = %d\n", i, k ); // iranai
if( i == njob ) // iranai
{
fprintf( stderr, "Error in restorecommongaps()\n" );
free( mem );
exit( 1 );
}
#endif
rep = mem[0];
len = strlen( seq[rep] );
len1 = len+1;
tmpseq = calloc( alloclen, sizeof( char ) );
tmpgapmap = calloc( alloclen, sizeof( int ) );
for( k=0; (i=mem[k])!=-1; k++ )
{
cptr = tmpseq;
for( j=0; j<len1; j++ )
{
klim = gapmap[j];
// for( k=0; k<gapmap[j]; k++ )
while( klim-- )
*(cptr++) = gapchar; // ???
*(cptr++) = seq[i][j];
}
*cptr = 0;
strcpy( seq[i], tmpseq );
}
iptr = tmpgapmap;
for( j=0; j<len1; j++ )
{
*(iptr++) = gapmap[j];
for( k=0; k<gapmap[j]; k++ )
*(iptr++) = 0;
}
*iptr = -1;
iptr = tmpgapmap;
while( *iptr != -1 ) *gapmap++ = *iptr++;
free( mem );
free( tmpseq );
free( tmpgapmap );
}
int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int **deletelist )
{
int i, j, p, q, allgap, ndel;
int len = strlen( oseq[0] );
char *eqseq, tmpc;
// reporterr( "In deletenewinsertions_whole_eq\n" );
// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
eqseq = calloc( len+1, sizeof( char ) );
for( i=0; i<len; i++ )
{
allgap = 0;
for( j=0; j<on; j++ )
{
tmpc = oseq[j][i];
if( tmpc != '-' && tmpc != '=' ) break;
}
if( j == on )
allgap = 1;
if( allgap )
{
eqseq[i] = '=';
}
else
{
eqseq[i] = 'o';
}
}
// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
// reporterr( "\ne = %s\n", eqseq );
// for( j=0; j<1; j++ ) reporterr( "a = %s\n", aseq[j] );
if( deletelist )
{
for( j=0; j<an; j++ )
{
ndel = 0;
for( i=0,q=0; i<len; i++ )
{
tmpc = aseq[j][i];
if( tmpc != '-' && tmpc != '=' )
{
if( eqseq[i] == '=' )
{
// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
deletelist[j] = realloc( deletelist[j], sizeof( int ) * (ndel+2) );
deletelist[j][ndel] = q;
ndel++;
}
q++;
}
}
deletelist[j][ndel] = -1;
}
}
for( i=0,p=0; i<len; i++ )
{
// if( oseq[0][i] != '=' )
// reporterr( "i=%d, p=%d, q=%d, originally, %c\n", i, p, q, originallygapped[p]);
// if( eqseq[i] != '=' && originallygapped[p] != '-' ) // dame!!
if( eqseq[i] != '=' )
{
// reporterr( "COPY! p=%d\n", p );
if( p != i )
{
for( j=0; j<on; j++ ) oseq[j][p] = oseq[j][i];
for( j=0; j<an; j++ ) aseq[j][p] = aseq[j][i];
}
p++;
}
}
// reporterr( "deletemap = %s\n", deletemap );
// reporterr( "eqseq = %s\n", eqseq );
// reporterr( "originallygapped = %s\n", originallygapped );
for( j=0; j<on; j++ ) oseq[j][p] = 0;
for( j=0; j<an; j++ ) aseq[j][p] = 0;
free( eqseq );
// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
return( i-p );
}
int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **deletelist )
{
int i, j, p, q, allgap, ndel;
int len = strlen( oseq[0] );
char *eqseq, tmpc;
// reporterr( "In deletenewinsertions_whole\n" );
// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
eqseq = calloc( len+1, sizeof( char ) );
for( i=0,p=0; i<len; i++ )
{
allgap = 0;
for( j=0; j<on; j++ )
{
tmpc = oseq[j][i];
if( tmpc != '-' ) break;
}
if( j == on )
allgap = 1;
if( allgap )
{
eqseq[i] = '=';
}
else
{
eqseq[i] = 'o';
}
}
// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
// reporterr( "\ne = %s\n", eqseq );
// for( j=0; j<1; j++ ) reporterr( "a = %s\n", aseq[j] );
if( deletelist )
{
for( j=0; j<an; j++ )
{
ndel = 0;
for( i=0,q=0; i<len; i++ )
{
tmpc = aseq[j][i];
if( tmpc != '-' )
{
if( eqseq[i] == '=' )
{
// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
deletelist[j] = realloc( deletelist[j], sizeof( int ) * (ndel+2) );
deletelist[j][ndel] = q;
ndel++;
}
q++;
}
}
deletelist[j][ndel] = -1;
}
}
for( i=0,p=0; i<len; i++ )
{
// if( oseq[0][i] != '=' )
// reporterr( "i=%d, p=%d, q=%d, originally, %c\n", i, p, q, originallygapped[p]);
// if( eqseq[i] != '=' && originallygapped[p] != '-' ) // dame!!
if( eqseq[i] != '=' )
{
// reporterr( "COPY! p=%d\n", p );
if( p != i )
{
for( j=0; j<on; j++ ) oseq[j][p] = oseq[j][i];
for( j=0; j<an; j++ ) aseq[j][p] = aseq[j][i];
}
p++;
}
}
// reporterr( "deletemap = %s\n", deletemap );
// reporterr( "eqseq = %s\n", eqseq );
// reporterr( "originallygapped = %s\n", originallygapped );
for( j=0; j<on; j++ ) oseq[j][p] = 0;
for( j=0; j<an; j++ ) aseq[j][p] = 0;
free( eqseq );
// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
return( i-p );
}
#if 0
int maskoriginalgaps( char *repseq, char *originallygapped )
{
int i, p;
int len = strlen( repseq );
// reporterr( "repseq = %s\n", repseq );
for( i=0,p=0; i<len; i++ )
{
if( repseq[i] == '=' )
{
if( originallygapped[p] == '-' )
{
repseq[i] = '-';
p++;
}
}
else
{
p++;
}
}
reporterr( "repseq = %s\n", repseq );
exit( 1 );
}
void restoregaponlysites( char *originallygapped, int n1, int n2, char **s1, char **s2, int rep )
{
int i, j, p;
char *tmpnew;
int len;
reporterr( "originallygapped = %s\n", originallygapped );
reporterr( "s1[0] = %s\n", s1[0] );
reporterr( "s1[rep] = %s\n", s1[rep] );
reporterr( "s2[0] = %s\n", s2[0] );
exit( 1 );
tmpnew = calloc( strlen( originallygapped )+1, sizeof( char ) );
len = strlen( s1[0] );
for( i=0,p=0; i<len; i++ )
{
reporterr( "i=%d, p=%d, s[]=%c, o[]=%c\n", i, p, s1[0][i], originallygapped[p] );
if( originallygapped[p] == 'o' )
{
tmpnew[p] = s1[0][i];
p++;
}
while( originallygapped[p] == '-' )
{
tmpnew[p] = '-';
p++;
}
}
reporterr( "s1[0] = %s\n", s1[0] );
reporterr( "tmpnew = %s\n", tmpnew );
}
#endif
int recordoriginalgaps( char *originallygapped, int n, char **s )
{
int i, j;
int len = strlen( s[0] );
int v = 0;
for( i=0; i<len; i++ )
{
for( j=0; j<n; j++ ) if( s[j][i] != '-' ) break;
if( j == n )
originallygapped[i] = '-';
else
originallygapped[i] = 'o';
}
originallygapped[i] = 0;
return( v );
}
void restoreoriginalgaps( int n, char **seq, char *originalgaps )
{
int i, j, p;
int lenf = strlen( originalgaps );
char *tmpseq = calloc( lenf+1, sizeof( char ) );
for( i=0; i<n; i++ )
{
for( j=0,p=0; j<lenf; j++ )
{
if( originalgaps[j] == '-' )
tmpseq[j] = '-';
else
tmpseq[j] = seq[i][p++];
}
strcpy( seq[i], tmpseq );
}
free( tmpseq );
}
void reconstructdeletemap( int nadd, char **addbk, int **deletelist, char **realn, FILE *fp, char **name )
{
int i, j, p, len;
char *gapped, *nameptr, *tmpptr;
for( i=0; i<nadd; i++ )
{
len = strlen( addbk[i] );
gapped = calloc( len+1, sizeof( char ) );
// for( j=0; j<len; j++ ) gapped[j] = 'o'; // iranai
// gapped[len] = 0; // iranai
nameptr = name[i] + 1;
if( outnumber )
nameptr = strstr( nameptr, "_numo_e" ) + 8;
if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
fprintf( fp, ">%s\n", nameptr );
fprintf( fp, "# letter, position in the original sequence, position in the reference alignment\n" );
// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
for( j=0; (p=deletelist[i][j])!=-1; j++ )
{
// reporterr( "deleting %d, %c\n", p, addbk[i][p] );
gapped[p] = '-';
}
// reporterr( "addbk = %s\n", addbk[i] );
// reporterr( "gapped = %s\n", gapped );
for( j=0,p=0; j<len; j++ )
{
while( realn[i][p] == '-' )
p++;
if( gapped[j] == '-' )
{
fprintf( fp, "%c, %d, -\n", addbk[i][j], j+1 ); // 1origin
}
else
{
fprintf( fp, "%c, %d, %d\n", addbk[i][j], j+1, p+1 ); // 1origin
p++;
}
}
free( gapped );
}
}