Skip to main content

Transcriptomics

Differential expression, GEO dataset retrieval, and pathway analysis using direct tool calls and file upload/download.

Upload count matrix → DEG analysis → download results

Upload a raw count matrix CSV, run differential expression analysis, and download the ranked DEG table.

from smartsbio import SmartsBio

client = SmartsBio(api_key="sk_live_...")
ws_id = client.workspaces.list()[0].id

# 1. Upload count matrix (genes × samples)
upload = client.files.upload("counts.csv", workspace_id=ws_id, path="input/")
print(f"Uploaded: {upload['key']}")

# 2. Run DESeq2-style differential expression
deg = client.tools.run(
    tool_id="deseq2_analysis",
    input={
        "file_path": upload["key"],
        "workspace_id": ws_id,
        "condition_col": "condition",
        "reference_level": "control",
        "treatment_level": "treated",
        "padj_threshold": 0.05,
        "log2fc_threshold": 1.0,
        "output_path": "results/",
    },
)
print(f"DEGs found: {deg['n_significant']}")

# 3. Download ranked DEG table
local = client.files.download(deg["output_path"], workspace_id=ws_id, dest="./output/")
print(f"Results saved to {local}")

Fetch a GEO dataset and run pathway enrichment

Download an RNA-seq dataset from GEO, then run KEGG pathway enrichment on the top DEGs.

from smartsbio import SmartsBio

client = SmartsBio(api_key="sk_live_...")
ws_id = client.workspaces.list()[0].id

# Step 1: fetch GEO dataset
geo = client.tools.run(
    tool_id="geo_fetch",
    input={
        "accession": "GSE183947",   # COVID-19 PBMC RNA-seq
        "workspace_id": ws_id,
        "output_path": "geo/",
    },
)
print(f"Downloaded {geo['n_samples']} samples, {geo['n_genes']} genes")

# Step 2: KEGG pathway enrichment on top DEGs
gene_list = geo.get("top_degs", [])[:200]
enrichment = client.tools.run(
    tool_id="kegg_enrichment",
    input={"gene_list": gene_list, "species": "hsa", "pvalue_threshold": 0.05},
)

for p in enrichment["pathways"][:5]:
    print(f"  {p['name']}  p={p['pvalue']:.2e}  k={p['k']}/{p['n']}")

# Step 3: download enrichment results table
out = client.files.download(enrichment["output_path"], workspace_id=ws_id, dest="./")
print(f"Enrichment table saved to {out}")

Isoform quantification from a BAM file

Upload a sorted BAM file, run isoform quantification, download the TPM matrix, then stream the agent's interpretation.

from smartsbio import SmartsBio

client = SmartsBio(api_key="sk_live_...")
ws_id = client.workspaces.list()[0].id

# 1. Upload sorted BAM + index
bam = client.files.upload("sample.bam", workspace_id=ws_id, path="bam/")
bai = client.files.upload("sample.bam.bai", workspace_id=ws_id, path="bam/")

# 2. Isoform quantification
quant = client.tools.run(
    tool_id="rna_isoform_quant",
    input={
        "bam": bam["key"],
        "bai": bai["key"],
        "workspace_id": ws_id,
        "genome": "GRCh38",
        "gene_of_interest": "TP53",
        "output_path": "quant/",
    },
)
print(f"Isoforms detected: {quant['n_isoforms']}")

# 3. Download TPM matrix
local = client.files.download(quant["tpm_path"], workspace_id=ws_id, dest="./quant/")
print(f"TPM matrix saved to {local}")

# 4. Stream agent interpretation
for chunk in client.query.stream(
    prompt=f"Interpret the TP53 isoform TPM results at {quant['tpm_path']}. Which isoforms are dominant?",
    workspace_id=ws_id,
):
    if chunk.type == "content":
        print(chunk.content, end="", flush=True)