00001 #ifndef BIOSEQIO_HEADER
00002 #define BIOSEQIO_HEADER
00003
00004
00005
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <stdlib.h>
00020
00021 #ifdef _STANDARD_C_PLUS_PLUS
00022 #include <iostream>
00023 #else
00024 #include <iostream.h>
00025 #endif
00026
00027 #include <string>
00028 using namespace std;
00029 #ifndef NPOS
00030 #define NPOS string::npos
00031 #endif
00032
00033
00034
00035 #include"Monomer.hh"
00036 #include "Rangedimexc.hh"
00037
00038
00039
00040 namespace RazorBack {
00041
00046 class Bioseqio_
00047 {
00048
00049 private:
00050 char *Buffer;
00051 static const int BUFFERSIZE;
00052
00053
00054 protected:
00055
00060 Bioseqio_(): Buffer(NULL) {}
00061
00063 Bioseqio_(const Bioseqio_& B):Buffer(NULL) {}
00064
00069 Bioseqio_& operator=(const Bioseqio_& B) { return *this; }
00070
00072 virtual ~Bioseqio_() { delete [] Buffer; }
00073
00074 char* buffer() { return Buffer; }
00075 char& buffer(size_t Idx) { return Buffer[Idx]; }
00076
00078
00079 public:
00080
00082 enum Errtype_ {OK=0, FILEERR, FORMATERR, TITLEERR, IDERR, ANNOTERR, SEQERR, TYPEERR};
00084 enum Formatflags_ {UNKNOWN=0, FASTA=1, GCG=2, PIR=4, SWISS=8, EMBL=16, ANYFORMAT=31};
00085
00090 static const char *error_str(Errtype_ Err);
00091 static const char *format_str(Formatflags_ F);
00092
00094 protected:
00095
00096 void make_buffer() { if (Buffer==NULL) Buffer=new char [BUFFERSIZE]; }
00097 bool fetch_buffer(istream& Inf);
00098 int clean_seq(string& S, int Type, bool Nocase, bool GapsOK=false) const;
00099 static int autodetect_type(const string& S);
00100
00101 Errtype_ read_fasta(istream& Inf, string& Id,
00102 string& Title, string& Seq);
00103 Errtype_ read_pir(istream& Inf, string& Id,
00104 string& Title, string& Annot, string& Seq);
00105 Errtype_ read_gcg(istream& Inf, string& Id,
00106 string& Annot, string& Seq,
00107 int &Gcglen, int &Gcgcheck, char &Typechar);
00108 Errtype_ read_swiss(istream& Inf, string& Id,
00109 string& Annot, string& Seq,
00110 int& Swlen, int& Mw, unsigned int& Crc32);
00111 Errtype_ read_embl(istream& Inf, string& Id,
00112 string& Annot, string& Seq,
00113 int& Embllen, int Acgtx[5]);
00114
00115 static Errtype_ write_fasta(ostream& Outf, const string& ID,
00116 const string& Title, const string& Seq);
00117 static Errtype_ write_pir(ostream& Outf, const string& ID,
00118 const string& Title, const string& Annot, const string& Seq);
00119 static Errtype_ write_gcg(ostream& Outf, const string& ID,
00120 const string& Title, const string& Annot, const string& Seq,
00121 Monomer_::Type_ Type);
00122 static Errtype_ write_swiss(ostream& Outf, const string& ID,
00123 const string& Title, const string& Annot, const string& Seq);
00124 static Errtype_ write_embl(ostream& Outf, const string& ID,
00125 const string& Title, const string& Annot, const string& Seq,
00126 Monomer_::Type_ Type);
00127
00128 static int gcg_checksum(const string& S);
00129 static const char *current_datestr();
00130 static int mol_weight(const string& S);
00131 static void BuildCRCTable(unsigned int CRCTable[256]);
00132 static unsigned int crc_32 (const string& buffer);
00133 static void count_bases(const string& S, int Acgtx[5], bool Isdna);
00134 static bool compare_basecount(const string& S, const int Acgtx[5], bool Isdna);
00135
00136 private:
00137
00138 static void parse_title(const char *Strc, string& Tok1, string& Rest);
00139 static void swissembl_comment(ostream& Outf, const string& S);
00140 };
00141
00142
00143 }
00144
00145
00146
00147 #endif // BIOSEQIO_HEADER