You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.1 KiB
41 lines
1.1 KiB
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any
|
|
|
|
|
|
@dataclass
|
|
class ScrapedItem:
|
|
title: str
|
|
content: str
|
|
url: str
|
|
timestamp: datetime = field(default_factory=datetime.now)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'title': self.title,
|
|
'content': self.content,
|
|
'url': self.url,
|
|
'timestamp': self.timestamp.isoformat()
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class ScrapedData:
|
|
source: str
|
|
strategy_name: str
|
|
items: List[ScrapedItem] = field(default_factory=list)
|
|
scraped_at: datetime = field(default_factory=datetime.now)
|
|
total_items: int = 0
|
|
|
|
def add_item(self, item: ScrapedItem):
|
|
self.items.append(item)
|
|
self.total_items = len(self.items)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'source': self.source,
|
|
'strategy_name': self.strategy_name,
|
|
'items': [item.to_dict() for item in self.items],
|
|
'scraped_at': self.scraped_at.isoformat(),
|
|
'total_items': self.total_items
|
|
}
|
|
|