AI Web FeedsAIWebFeeds
Guides

Database Quick Start

Get started with the AI Web Feeds database in minutes

Database Quick Start

Get up and running with the AI Web Feeds database system quickly.

First-Time Setup

1. Initialize Alembic (One-Time)

cd packages/ai_web_feeds
uv run alembic init alembic

2. Create Initial Migration

uv run alembic revision --autogenerate -m "initial_schema"
uv run alembic upgrade head

3. Load Data from YAML Files

from ai_web_feeds.data_sync import DataSyncOrchestrator
from ai_web_feeds import DatabaseManager

db = DatabaseManager("sqlite:///../../data/aiwebfeeds.db")
sync = DataSyncOrchestrator(db)

# Load feeds.yaml and topics.yaml into database
results = sync.full_sync()
print(f"Loaded {results['feeds_loaded']} feeds and {results['topics_loaded']} topics")

Common Usage Patterns

Core Analytics

from ai_web_feeds import DatabaseManager
from ai_web_feeds.analytics import FeedAnalytics

db = DatabaseManager("sqlite:///../../data/aiwebfeeds.db")

with db.get_session() as session:
    analytics = FeedAnalytics(session)

    # Overview statistics
    stats = analytics.get_overview_stats()
    print(f"Total feeds: {stats['total_feeds']}")

    # Quality metrics
    quality = analytics.get_quality_metrics()
    print(f"Average quality score: {quality['avg_quality_score']:.2f}")

    # Health report
    health = analytics.generate_health_report()
    print(f"Healthy feeds: {health['overall_stats']['healthy_count']}")

Advanced Analytics

from ai_web_feeds.analytics.advanced import AdvancedFeedAnalytics

with db.get_session() as session:
    analytics = AdvancedFeedAnalytics(session)

    # Predict feed health
    prediction = analytics.predict_feed_health("feed_id_123", days_ahead=7)
    print(f"Predicted health: {prediction['predicted_health']:.2f}")

    # Cluster similar feeds
    clusters = analytics.cluster_feeds_by_similarity(similarity_threshold=0.6)
    print(f"Found {len(clusters)} clusters")

    # Generate ML insights
    insights = analytics.generate_ml_insights_report()
    print(f"Top pattern: {insights['patterns'][0]['pattern_type']}")

Data Synchronization

from ai_web_feeds.data_sync import DataSyncOrchestrator

sync = DataSyncOrchestrator(db)

# Full bidirectional sync
results = sync.full_sync()

# Export enriched data
export_results = sync.export_enriched_feeds("../../data/feeds.enriched.yaml")
print(f"Exported {export_results['feeds_exported']} feeds")

# Sync with progress callback
def on_progress(current, total, item_type):
    print(f"Progress: {current}/{total} {item_type}")

results = sync.full_sync(progress_callback=on_progress)

Working with Advanced Models

from ai_web_feeds.models_advanced import (
    FeedHealthMetric,
    DataQualityMetric,
    ContentEmbedding
)

with db.get_session() as session:
    # Record health metric
    health = FeedHealthMetric(
        feed_source_id="feed_123",
        overall_health_score=0.85,
        availability_score=0.95,
        freshness_score=0.80,
        content_quality_score=0.90
    )
    session.add(health)

    # Store content embedding
    embedding = ContentEmbedding(
        feed_item_id="item_456",
        embedding_vector=[0.1, 0.2, 0.3],  # Actual embeddings from model
        model_name="text-embedding-ada-002",
        dimension=1536
    )
    session.add(embedding)

    session.commit()

Usage Examples from Python

Basic Analytics

from ai_web_feeds import DatabaseManager
from ai_web_feeds.analytics import FeedAnalytics

# Initialize
db = DatabaseManager("sqlite:///data/aiwebfeeds.db")
db.create_db_and_tables()

# Run analytics
with db.get_session() as session:
    analytics = FeedAnalytics(session)

    # Overview stats
    stats = analytics.get_overview_stats()

    # Quality metrics
    quality = analytics.get_quality_metrics()

    # Feed health
    health = analytics.get_feed_health_report("feed_xyz")

    # Full report
    report = analytics.generate_full_report()

Advanced Analytics

from ai_web_feeds.analytics.advanced import AdvancedFeedAnalytics

with db.get_session() as session:
    analytics = AdvancedFeedAnalytics(session)

    # Predict feed health 7 days ahead
    prediction = analytics.predict_feed_health("feed_xyz", days_ahead=7)

    # Detect content patterns
    patterns = analytics.detect_content_patterns("feed_xyz")

    # Find similar feeds
    similarity = analytics.compute_feed_similarity("feed_1", "feed_2")

    # Cluster feeds
    clusters = analytics.cluster_feeds_by_similarity(similarity_threshold=0.6)

    # ML insights report
    insights = analytics.generate_ml_insights_report()

Data Synchronization

from ai_web_feeds.data_sync import DataSyncOrchestrator, SyncConfig

# Configure sync
config = SyncConfig(
    feeds_yaml_path=Path("data/feeds.yaml"),
    topics_yaml_path=Path("data/topics.yaml"),
    batch_size=100,
    update_existing=True,
)

# Initialize sync
sync = DataSyncOrchestrator(db, config)

# Full bidirectional sync
results = sync.full_sync()

print(f"Topics synced: {results['topics']}")
print(f"Feeds synced: {results['feeds']}")
print(f"Export complete: {results['export']}")

Load Feeds from YAML

from ai_web_feeds.data_sync import FeedDataLoader

loader = FeedDataLoader(db)

# With progress callback
def progress(current, total):
    print(f"Loading feeds: {current}/{total}")

stats = loader.load_feeds_from_yaml(progress_callback=progress)
print(f"Inserted: {stats['inserted']}, Updated: {stats['updated']}")

Export Enriched Data

from ai_web_feeds.data_sync import DataExporter

exporter = DataExporter(db)
output_path = exporter.export_enriched_feeds()
print(f"Exported to: {output_path}")

Database Management

Check Database Status

from ai_web_feeds import DatabaseManager

db = DatabaseManager("sqlite:///../../data/aiwebfeeds.db")

with db.get_session() as session:
    from ai_web_feeds.models import FeedSource
    feed_count = session.query(FeedSource).count()
    print(f"Database contains {feed_count} feeds")

Run Migrations

# Check current version
uv run alembic current

# Upgrade to latest
uv run alembic upgrade head

# Downgrade one version
uv run alembic downgrade -1

# Show migration history
uv run alembic history

Backup Database

# SQLite backup
cp data/aiwebfeeds.db data/aiwebfeeds.db.backup

# Or use SQLite backup command
sqlite3 data/aiwebfeeds.db ".backup data/aiwebfeeds.db.backup"

Migration Strategy

Initial Setup (First Time)

# 1. Create tables
cd packages/ai_web_feeds
uv run python -c "from ai_web_feeds import DatabaseManager; db = DatabaseManager(); db.create_db_and_tables()"

# 2. Initialize Alembic
uv run alembic init alembic

# 3. Create initial migration
uv run alembic revision --autogenerate -m "initial_schema"

# 4. Apply migration
uv run alembic upgrade head

# 5. Load data
uv run python -c "from ai_web_feeds.data_sync import DataSyncOrchestrator; from ai_web_feeds import DatabaseManager; sync = DataSyncOrchestrator(DatabaseManager()); sync.full_sync()"

Ongoing Updates

# 1. Modify models in models.py or models_advanced.py

# 2. Generate migration
uv run alembic revision --autogenerate -m "add_new_field"

# 3. Review migration file in alembic/versions/

# 4. Apply migration
uv run alembic upgrade head

Testing

# Run all tests with coverage
cd tests
uv run pytest --cov=ai_web_feeds --cov-report=html

# Run specific test file
uv run pytest tests/packages/ai_web_feeds/test_data_sync.py -v

# Run with markers
uv run pytest -m "not slow" -v

File Reference

FilePurpose
models.pyCore database models (FeedSource, FeedItem, etc.)
models_advanced.pyAdvanced models (health, quality, embeddings)
analytics/core.pyCore analytics functions
analytics/advanced.pyML-powered analytics
data_sync.pyYAML ↔ Database synchronization
storage.pyDatabase connection management

Version: 0.1.0 Last Updated: October 15, 2025