BasicAnnotationDb#

class BasicAnnotationDb(*args, **kwargs: dict[str, Any])#

Provides a user table for annotations. This can be merged with either the Gff or Genbank versions.

Attributes:
db
describe

top level description of the annotation db

table_names

Methods

add_feature(*, seqid, biotype, name, spans)

adds a record to user table

biotype_counts()

return counts of biological types across all tables and seqids

close()

closes the db

compatible(other_db[, symmetric])

checks whether table_names are compatible

count_distinct(*[, seqid, biotype, name])

return table of counts of distinct values

get_feature_children(name[, biotype])

yields children of name

get_feature_parent(name, **kwargs)

yields parents of name

get_records_matching(*[, biotype, seqid, ...])

return all fields for matching records

make_indexes()

adds db indexes for core attributes

num_matches(*[, seqid, biotype, name, ...])

return the number of records matching condition

subset(*[, source, biotype, seqid, name, ...])

returns a new db instance with records matching the provided conditions

to_rich_dict()

returns a dict suitable for json serialisation

union(annot_db)

returns a new instance with merged records with other

update(annot_db[, seqids])

update records with those from an instance of the same type

write(path)

writes db as bytes to path

add_records

from_dict

get_features_matching

to_json

Notes

This is the default db on Sequence, SequenceCollection and Alignment

StrOrBool = str | bool#
add_feature(*, seqid: str, biotype: str, name: str, spans: list[tuple[int, int]], parent_id: str | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = False) None#

adds a record to user table

Parameters:
seqidstr

name of the sequence feature resides on

biotypestr

biological type of the record

namestr

the name of a record, an identifier

spanstyping.List[typing.Tuple[int, int]], optional

this will be sorted

strandstr, optional

either +, -. Defaults to ‘+’

attributesstr, optional

additional attributes as a string

on_alignmentbool, optional

whether the annotation is an alignment annotation

add_records(data: Iterable[dict] | None, **kwargs: dict[str, Any]) None#
biotype_counts() dict#

return counts of biological types across all tables and seqids

close() None#

closes the db

compatible(other_db: SupportsFeatures, symmetric=True) bool#

checks whether table_names are compatible

Parameters:
other_db

the other annotation db instance

symmetric

checks only that tables of other_db equal, or are a subset, of mine

count_distinct(*, seqid: StrOrBool = False, biotype: StrOrBool = False, name: StrOrBool = False) Table | None#

return table of counts of distinct values

Parameters:
seqid, biotype, name

if a string, selects the subset of rows matching the provided values and counts distinct values for the other fields whose value is True.

Returns:
Table with columns corresponding to argument whose value was True

Examples

To compute copy number by gene name within each genome

>>> counts_table = db.count_distinct(seqid=True, biotype="gene", name=True)
property db: Connection#
property describe: Table#

top level description of the annotation db

classmethod from_dict(data: dict) AnnotationDbABC#
get_feature_children(name: str, biotype: str | None = None, **kwargs: dict[str, Any]) Iterator[FeatureDataType]#

yields children of name

get_feature_parent(name: str, **kwargs: dict[str, Any]) Iterator[FeatureDataType]#

yields parents of name

get_features_matching(*, biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None, allow_partial: bool = False) Iterator[FeatureDataType]#
get_records_matching(*, biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None, allow_partial: bool = False) Iterator[dict]#

return all fields for matching records

make_indexes() None#

adds db indexes for core attributes

num_matches(*, seqid: str | None = None, biotype: str | None = None, name: str | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None) int#

return the number of records matching condition

subset(*, source: str | PathLike | PurePath | Path = ':memory:', biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, allow_partial: bool = False) Self#

returns a new db instance with records matching the provided conditions

property table_names: tuple[str]#
to_json() str#
to_rich_dict() dict#

returns a dict suitable for json serialisation

union(annot_db: SupportsFeatures) SupportsFeatures#

returns a new instance with merged records with other

Parameters:
annot_db

an annotation db whose schema is either a subset, or superset of self

Returns:
The class whose schema contains the other
update(annot_db: SupportsFeatures, seqids: str | list[str] | None = None, **kwargs: dict[str, Any]) None#

update records with those from an instance of the same type

write(path: str | PathLike | PurePath | Path) None#

writes db as bytes to path