GenbankAnnotationDb#

class GenbankAnnotationDb(*args, **kwargs)#

Support for annotations from Genbank files.

Attributes:
db
describe

top level description of the annotation db

table_names

Methods

add_feature(*, seqid, biotype, name, spans)

adds a record to user table

biotype_counts()

return counts of biological types across all tables and seqids

close()

closes the db

compatible(other_db[, symmetric])

checks whether table_names are compatible

count_distinct(*[, seqid, biotype, name])

return table of counts of distinct values

get_feature_children(name[, biotype, ...])

yields children of name

get_feature_parent(name[, exclude_biotype, ...])

yields parents of name

get_records_matching(*[, biotype, seqid, ...])

return all fields for matching records

make_indexes()

adds db indexes for core attributes

num_matches(*[, seqid, biotype, name, ...])

return the number of records matching condition

subset(*[, source, biotype, seqid, name, ...])

returns a new db instance with records matching the provided conditions

to_rich_dict()

returns a dict suitable for json serialisation

union(annot_db)

returns a new instance with merged records with other

update(annot_db[, seqids])

update records with those from an instance of the same type

write(path)

writes db as bytes to path

StrOrBool

add_records

from_dict

get_features_matching

to_json

Notes

Extended attributes are stored as json in the gb, attributes column.

StrOrBool#

alias of str | bool

add_feature(*, seqid: str, biotype: str, name: str, spans: List[Tuple[int, int]], parent_id: str | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = False) None#

adds a record to user table

Parameters:
seqidstr

name of the sequence feature resides on

biotypestr

biological type of the record

namestr

the name of a record, an identifier

spanstyping.List[typing.Tuple[int, int]], optional

this will be sorted

strandstr, optional

either +, -. Defaults to ‘+’

attributesstr, optional

additional attributes as a string

on_alignmentbool, optional

whether the annotation is an alignment annotation

add_records(records, seqid)#
biotype_counts() dict#

return counts of biological types across all tables and seqids

close()#

closes the db

compatible(other_db: SupportsFeatures, symmetric=True) bool#

checks whether table_names are compatible

Parameters:
other_db

the other annotation db instance

symmetric

checks only that tables of other_db equal, or are a subset, of mine

count_distinct(*, seqid: StrOrBool = False, biotype: StrOrBool = False, name: StrOrBool = False) Optional[Table]#

return table of counts of distinct values

Parameters:
seqid, biotype, name

if a string, selects the subset of rows matching the provided values and counts distinct values for the other fields whose value is True.

Returns:
Table with columns corresponding to argument whose value was True

Examples

To compute copy number by gene name within each genome

>>> counts_table = db.count_distinct(seqid=True, biotype="gene", name=True)
property db: Connection#
property describe: Table#

top level description of the annotation db

classmethod from_dict(data: dict)#
get_feature_children(name: str, biotype: str | None = None, exclude_biotype: str | None = None, start: int | None = None, stop: int | None = None) Iterator[FeatureDataType]#

yields children of name

get_feature_parent(name: str, exclude_biotype: str | None = None, start: int | None = None, stop: int | None = None) Iterator[FeatureDataType]#

yields parents of name

get_features_matching(*, biotype: str = None, seqid: str = None, name: str = None, start: int = None, stop: int = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool = None, allow_partial: bool = False) Iterator[FeatureDataType]#
get_records_matching(*, biotype: str = None, seqid: str = None, name: str = None, start: int = None, stop: int = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool = None, allow_partial: bool = False) Iterator[dict]#

return all fields for matching records

make_indexes()#

adds db indexes for core attributes

num_matches(*, seqid: str | None = None, biotype: str | None = None, name: str | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None) int#

return the number of records matching condition

subset(*, source: str | PathLike | PurePath = ':memory:', biotype: str = None, seqid: str = None, name: str = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, allow_partial: bool = False) Self#

returns a new db instance with records matching the provided conditions

property table_names: tuple[str]#
to_json() str#
to_rich_dict() dict#

returns a dict suitable for json serialisation

union(annot_db: SupportsFeatures) SupportsFeatures#

returns a new instance with merged records with other

Parameters:
annot_db

an annotation db whose schema is either a subset, or superset of self

Returns:
The class whose schema contains the other
update(annot_db: SupportsFeatures, seqids: str | List[str] | None = None) None#

update records with those from an instance of the same type

write(path: str | PathLike | PurePath) None#

writes db as bytes to path