BioPHP: PHP for Biocomputing

Last updated: May 17, 2003 (Saturday)

BioPHP Sample Database top

The sample database contains 28 sequences from the GenBank flat file GBUNA.SEQ (Unannotated sequences). Figure 1 shows the relationships between the tables. I shall explain these scripts and the overall database design later. Let me say that this is only one of many possible designs, and I make no claim that this is the best design.

Figure 1. Relationship between tables in the sample database.

The MySQL table-creation scripts

// Creates the database
CREATE DATABASE genephp;
USE genephp;

// Creates the SEQUENCE table
CREATE TABLE sequence (
  PRIM_ACC varchar(8) NOT NULL default '0',
  ENTRY_NAME varchar(8) NOT NULL default '',
  SEQ_LENGTH int(11) default NULL,
  MOL_TYPE varchar(6) default NULL,
  DATE date default NULL,
  SOURCE varchar(80) default NULL,
  SEQUENCE mediumtext NOT NULL,
  DESCRIPTION varchar(240) default NULL,
  ORGANISM varchar(80) default NULL,
  PRIMARY KEY  (PRIM_ACC),
  KEY LOCUS_NAME (ENTRY_NAME,MOL_TYPE)
) TYPE=MyISAM;

// Creates the ACCESSION table.
CREATE TABLE accession (
  PRIM_ACC varchar(8) NOT NULL default '',
  ACCESSION varchar(8) NOT NULL default '',
  PRIMARY KEY  (PRIM_ACC,ACCESSION),
  UNIQUE KEY PRIM_ACC (PRIM_ACC,ACCESSION),
  KEY PRIM_ACC_2 (PRIM_ACC,ACCESSION)
) TYPE=MyISAM;

// Creates the authors table
CREATE TABLE authors (
  PRIM_ACC varchar(8) NOT NULL default '',
  REFNO int(11) NOT NULL default '0',
  AUTHOR varchar(50) NOT NULL default '',
  KEY PRIM_ACC (PRIM_ACC,REFNO)
) TYPE=MyISAM;

// Creates the gbfeatures table
CREATE TABLE gbfeatures (
  PRIM_ACC varchar(8) NOT NULL default '',
  FT_KEY varchar(15) NOT NULL default '',
  FT_QUAL varchar(60) NOT NULL default '',
  FT_VALUE text,
  PRIMARY KEY  (PRIM_ACC,FT_KEY,FT_QUAL),
  UNIQUE KEY PRIM_ACC (PRIM_ACC,FT_KEY,FT_QUAL),
  KEY PRIM_ACC_2 (PRIM_ACC,FT_KEY,FT_QUAL)
) TYPE=MyISAM;

// Creates the GBSEQUENCE table
CREATE TABLE gbsequence (
  PRIM_ACC varchar(8) NOT NULL default '',
  STRANDS char(2) default NULL,
  TOPOLOGY char(1) default NULL,
  DIVISION char(3) default NULL,
  SEGMENT_NO int(11) default NULL,
  SEGMENT_COUNT int(11) default NULL,
  VERSION varchar(10) default NULL,
  NCBI_GI_ID varchar(30) default NULL,
  PRIMARY KEY  (PRIM_ACC),
  UNIQUE KEY PRIM_ACC (PRIM_ACC),
  KEY PRIM_ACC_2 (PRIM_ACC)
) TYPE=MyISAM;

// Creates the KEYWORDS table.
CREATE TABLE keywords (
  PRIM_ACC varchar(8) NOT NULL default '',
  KEYWORDS varchar(80) NOT NULL default '',
  PRIMARY KEY  (PRIM_ACC,KEYWORDS),
  UNIQUE KEY PRIM_ACC (PRIM_ACC,KEYWORDS),
  KEY PRIM_ACC_2 (PRIM_ACC,KEYWORDS)
) TYPE=MyISAM;

// Creates the REFERENCE table.
CREATE TABLE reference (
  PRIM_ACC varchar(8) NOT NULL default '',
  REFNO int(11) NOT NULL default '0',
  BASE_RANGE varchar(80) default NULL,
  TITLE varchar(255) default NULL,
  MEDLINE varchar(8) default NULL,
  PUBMED varchar(20) default NULL,
  REMARK varchar(255) default NULL,
  JOURNAL text,
  PRIMARY KEY  (PRIM_ACC,REFNO),
  KEY PRIM_ACC (PRIM_ACC,REFNO)
) TYPE=MyISAM;

// Creates the SRCFORM table.  SRCFORM is short for "source form".  This table contains 
// the sequence data in their original data format (e.g. GenBank, SwissProt, PDB, etc.).
CREATE TABLE srcform (
PRIM_ACC varchar(8) NOT NULL default '',
ENTRY mediumtext,
PRIMARY KEY (PRIM_ACC),
UNIQUE KEY PRIM_ACC (PRIM_ACC),
KEY PRIM_ACC_2 (PRIM_ACC)
) TYPE=MyISAM; Back to Top.
Back to Home Page.

 


Copyright © 2003 by Sergio Gregorio, Jr.
All rights reserved.