The sample database contains 28 sequences from the
GenBank flat file GBUNA.SEQ (Unannotated sequences). Figure 1 shows
the relationships between the tables. I shall explain these scripts
and the overall database design later. Let me say that this is only
one of many possible designs, and I make no claim that this is the
best design.
Figure 1. Relationship between tables in the sample
database.

The MySQL table-creation scripts
// Creates the database
CREATE DATABASE genephp;
USE genephp;
// Creates the SEQUENCE table
CREATE TABLE sequence (
PRIM_ACC varchar(8) NOT NULL default '0',
ENTRY_NAME varchar(8) NOT NULL default '',
SEQ_LENGTH int(11) default NULL,
MOL_TYPE varchar(6) default NULL,
DATE date default NULL,
SOURCE varchar(80) default NULL,
SEQUENCE mediumtext NOT NULL,
DESCRIPTION varchar(240) default NULL,
ORGANISM varchar(80) default NULL,
PRIMARY KEY (PRIM_ACC),
KEY LOCUS_NAME (ENTRY_NAME,MOL_TYPE)
) TYPE=MyISAM;
// Creates the ACCESSION table.
CREATE TABLE accession (
PRIM_ACC varchar(8) NOT NULL default '',
ACCESSION varchar(8) NOT NULL default '',
PRIMARY KEY (PRIM_ACC,ACCESSION),
UNIQUE KEY PRIM_ACC (PRIM_ACC,ACCESSION),
KEY PRIM_ACC_2 (PRIM_ACC,ACCESSION)
) TYPE=MyISAM;
// Creates the authors table
CREATE TABLE authors (
PRIM_ACC varchar(8) NOT NULL default '',
REFNO int(11) NOT NULL default '0',
AUTHOR varchar(50) NOT NULL default '',
KEY PRIM_ACC (PRIM_ACC,REFNO)
) TYPE=MyISAM;
// Creates the gbfeatures table
CREATE TABLE gbfeatures (
PRIM_ACC varchar(8) NOT NULL default '',
FT_KEY varchar(15) NOT NULL default '',
FT_QUAL varchar(60) NOT NULL default '',
FT_VALUE text,
PRIMARY KEY (PRIM_ACC,FT_KEY,FT_QUAL),
UNIQUE KEY PRIM_ACC (PRIM_ACC,FT_KEY,FT_QUAL),
KEY PRIM_ACC_2 (PRIM_ACC,FT_KEY,FT_QUAL)
) TYPE=MyISAM;
// Creates the GBSEQUENCE table
CREATE TABLE gbsequence (
PRIM_ACC varchar(8) NOT NULL default '',
STRANDS char(2) default NULL,
TOPOLOGY char(1) default NULL,
DIVISION char(3) default NULL,
SEGMENT_NO int(11) default NULL,
SEGMENT_COUNT int(11) default NULL,
VERSION varchar(10) default NULL,
NCBI_GI_ID varchar(30) default NULL,
PRIMARY KEY (PRIM_ACC),
UNIQUE KEY PRIM_ACC (PRIM_ACC),
KEY PRIM_ACC_2 (PRIM_ACC)
) TYPE=MyISAM;
// Creates the KEYWORDS table.
CREATE TABLE keywords (
PRIM_ACC varchar(8) NOT NULL default '',
KEYWORDS varchar(80) NOT NULL default '',
PRIMARY KEY (PRIM_ACC,KEYWORDS),
UNIQUE KEY PRIM_ACC (PRIM_ACC,KEYWORDS),
KEY PRIM_ACC_2 (PRIM_ACC,KEYWORDS)
) TYPE=MyISAM;
// Creates the REFERENCE table.
CREATE TABLE reference (
PRIM_ACC varchar(8) NOT NULL default '',
REFNO int(11) NOT NULL default '0',
BASE_RANGE varchar(80) default NULL,
TITLE varchar(255) default NULL,
MEDLINE varchar(8) default NULL,
PUBMED varchar(20) default NULL,
REMARK varchar(255) default NULL,
JOURNAL text,
PRIMARY KEY (PRIM_ACC,REFNO),
KEY PRIM_ACC (PRIM_ACC,REFNO)
) TYPE=MyISAM;
// Creates the SRCFORM table. SRCFORM is short for "source form". This table contains
// the sequence data in their original data format (e.g. GenBank, SwissProt, PDB, etc.).
CREATE TABLE srcform ( PRIM_ACC varchar(8) NOT NULL default '', ENTRY mediumtext, PRIMARY KEY (PRIM_ACC), UNIQUE KEY PRIM_ACC (PRIM_ACC), KEY PRIM_ACC_2 (PRIM_ACC) ) TYPE=MyISAM;
Back to Top. Back to Home Page.
|