Clustal Omega 1.2.4
seq.h File Reference
#include "squid/squid.h"
#include "util.h"
Include dependency graph for seq.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  mseq_t
 structure for storing multiple sequences More...
 

Macros

#define SEQTYPE_UNKNOWN   kOtherSeq
 
#define SEQTYPE_DNA   kDNA
 
#define SEQTYPE_RNA   kRNA
 
#define SEQTYPE_PROTEIN   kAmino
 
#define AMINOACID_ANY   'X'
 
#define NUCLEOTIDE_ANY   'N'
 

Functions

void AliStat (mseq_t *prMSeq, bool bSampling, bool bReportAll)
 Stripped down version of squid's alistat.
 
void AddSeq (mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes)
 Creates a new sequence entry and appends it to an existing mseq structure.
 
void SeqSwap (mseq_t *mseq, int i, int j)
 Swap two sequences in an mseq_t structure.
 
void DealignMSeq (mseq_t *mseq)
 Dealigns all sequences in mseq structure, updates the sequence length info and sets aligned to FALSE.
 
const char * SeqTypeToStr (int seqtype)
 convert int-encoded iSeqType to string
 
int ReadSequences (mseq_t *prMSeq_p, char *pcSeqFile, int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs, int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch)
 reads sequences from file
 
void NewMSeq (mseq_t **mseq)
 allocate and initialise new mseq_t
 
void FreeMSeq (mseq_t **mseq)
 Frees an mseq_t and it's members and zeros all members.
 
void CopyMSeq (mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc)
 copies an mseq structure
 
void LogSqInfo (SQINFO *sqinfo)
 debug output of sqinfo struct
 
int FindSeqName (char *seqname, mseq_t *mseq)
 
int WriteAlignment (mseq_t *mseq, const char *aln_outfile, int msafile_format, int iWrap, bool bResno)
 Write alignment to file.
 
void DealignSeq (char *seq)
 Removes all gap-characters from a sequence.
 
void ShuffleMSeq (mseq_t *prMSeq)
 Shuffle mseq order.
 
void SortMSeqByLength (mseq_t *prMSeq, const char cOrder)
 Sort sequences by length.
 
void JoinMSeqs (mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd)
 Appends an mseq structure to an already existing one. filename will be left untouched.
 
bool SeqsAreAligned (mseq_t *prMSeq, bool bIsProfile, bool bDealignInputSeqs)
 Checks if sequences in given mseq structure are aligned. By definition this is only true, if sequences are of the same length and at least one gap was found.
 

Macro Definition Documentation

◆ AMINOACID_ANY

#define AMINOACID_ANY   'X'

◆ NUCLEOTIDE_ANY

#define NUCLEOTIDE_ANY   'N'

◆ SEQTYPE_DNA

#define SEQTYPE_DNA   kDNA

◆ SEQTYPE_PROTEIN

#define SEQTYPE_PROTEIN   kAmino

◆ SEQTYPE_RNA

#define SEQTYPE_RNA   kRNA

◆ SEQTYPE_UNKNOWN

#define SEQTYPE_UNKNOWN   kOtherSeq

int-encoded sequence types. these are in sync with squid's seqtypes and only used for convenience here

Function Documentation

◆ AddSeq()

void AddSeq ( mseq_t ** prMSeqDest_p,
char * pcSeqName,
char * pcSeqRes )
extern

Creates a new sequence entry and appends it to an existing mseq structure.

Parameters
[out]prMSeqDest_pAlready existing and initialised mseq structure
[in]pcSeqNamesequence name of the sequence to add
[in]pcSeqResthe actual sequence (residues) to add
Note
Don't forget to update the align and type flag if necessary!

FIXME allow adding of more features

◆ AliStat()

void AliStat ( mseq_t * prMSeq,
bool bSampling,
bool bReportAll )
extern

Stripped down version of squid's alistat.

Parameters
[in]prMSeqThe alignment to analyse
[in]bSamplingFor many sequences: samples from pool
[in]bReportAllReport identities for all sequence pairs

Don't have to worry about sequence case because our version of PairwiseIdentity is case insensitive

mseq to squid msa

FIXME code overlap with WriteAlignment. Make it a function and take code there (contains more comments) as template

◆ CopyMSeq()

void CopyMSeq ( mseq_t ** prMSeqDest_p,
mseq_t * prMSeqSrc )
extern

copies an mseq structure

Parameters
[out]prMSeqDest_pCopy of mseq structure
[in]prMSeqSrcSource mseq structure to copy
Note
caller has to free copy by calling FreeMSeq()

◆ DealignMSeq()

void DealignMSeq ( mseq_t * mseq)
extern

Dealigns all sequences in mseq structure, updates the sequence length info and sets aligned to FALSE.

Parameters
[out]mseqThe mseq structure to dealign

◆ DealignSeq()

void DealignSeq ( char * seq)
extern

Removes all gap-characters from a sequence.

Parameters
[out]seqSequence to dealign
Note
seq will not be reallocated

◆ FindSeqName()

int FindSeqName ( char * seqname,
mseq_t * mseq )
extern
Parameters
[in]seqnameThe sequence name to search for
[in]mseqThe multiple sequence structure to search in
Returns
-1 on failure, sequence index of matching name otherwise
Warning
If sequence name happens to be used twice, only the first one will be reported back

◆ FreeMSeq()

void FreeMSeq ( mseq_t ** mseq)
extern

Frees an mseq_t and it's members and zeros all members.

Parameters
[in]mseqmseq_to to free
Note
use in conjunction with NewMSeq()
See also
new_mseq

◆ JoinMSeqs()

void JoinMSeqs ( mseq_t ** prMSeqDest_p,
mseq_t * prMSeqToAdd )

Appends an mseq structure to an already existing one. filename will be left untouched.

Parameters
[in]prMSeqDest_pMSeq structure to which to append to
[out]prMSeqToAddMSeq structure which is to append

◆ LogSqInfo()

void LogSqInfo ( SQINFO * sqinfo)
extern

debug output of sqinfo struct

Parameters
[in]sqinfoSquid's SQINFO struct for a certain seqeuence
Note
useful for debugging only

◆ NewMSeq()

void NewMSeq ( mseq_t ** prMSeq)
extern

allocate and initialise new mseq_t

Parameters
[out]prMSeqnewly allocated and initialised mseq_t
Note
caller has to free by calling FreeMSeq()
See also
FreeMSeq

◆ ReadSequences()

int ReadSequences ( mseq_t * prMSeq,
char * seqfile,
int iSeqType,
int iSeqFmt,
bool bIsProfile,
bool bDealignInputSeqs,
int iMaxNumSeq,
int iMaxSeqLen,
char * pcHMMBatch )
extern

reads sequences from file

Parameters
[out]prMSeqMultiple sequence struct. Must be preallocated. FIXME: would make more sense to allocate it here.
[in]seqfileSequence file name. If '-' sequence will be read from stdin.
[in]iSeqTypeint-encoded sequence type. Set to SEQTYPE_UNKNOWN for autodetect (guessed from first sequence)
[in]iMaxNumSeqReturn an error, if more than iMaxNumSeq have been read
[in]iMaxSeqLenReturn an error, if a seq longer than iMaxSeqLen has been read
Returns
0 on success, -1 on error
Note
  • Depends heavily on squid
  • Sequence file format will be guessed
  • If supported by squid, gzipped files can be read as well.

◆ SeqsAreAligned()

bool SeqsAreAligned ( mseq_t * prMSeq,
bool bIsProfile,
bool bDealignInputSeqs )

Checks if sequences in given mseq structure are aligned. By definition this is only true, if sequences are of the same length and at least one gap was found.

Parameters
[in]prMSeqSequences to check
Returns
TRUE if sequences are aligned, FALSE if not

◆ SeqSwap()

void SeqSwap ( mseq_t * prMSeq,
int i,
int j )
extern

Swap two sequences in an mseq_t structure.

Parameters
[out]prMSeqMultiple sequence struct
[in]iIndex of first sequence
[in]jIndex of seconds sequence

◆ SeqTypeToStr()

const char * SeqTypeToStr ( int iSeqType)
extern

convert int-encoded iSeqType to string

Parameters
[in]iSeqTypeint-encoded sequence type
Returns
character pointer describing the sequence type

◆ ShuffleMSeq()

void ShuffleMSeq ( mseq_t * mseq)
extern

Shuffle mseq order.

Parameters
[out]mseqmseq structure to shuffle

◆ SortMSeqByLength()

void SortMSeqByLength ( mseq_t * prMSeq,
const char cOrder )
extern

Sort sequences by length.

Parameters
[out]prMSeqmseq to sort by length
[out]cOrderSorting order. 'd' for descending, 'a' for ascending.

◆ WriteAlignment()

int WriteAlignment ( mseq_t * mseq,
const char * pcAlnOutfile,
int outfmt,
int iWrap,
bool bResno )
extern

Write alignment to file.

Parameters
[in]mseqThe mseq_t struct containing the aligned sequences
[in]pcAlnOutfileThe name of the output file
[in]outfmtThe alignment output format (defined in squid.h)
[in]iWraplength of line for Clustal/Fasta format
Returns
Non-zero on error
Note
We create a temporary squid MSA struct in here because we never use it within clustal. We might be better of using the old clustal output routines instead.