You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
102 lines
3.6 KiB
102 lines
3.6 KiB
import argparse
|
|
import sys
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from controllers import ScraperController
|
|
from commands import ScrapeCommand, ListCommand
|
|
from views import ConsoleView
|
|
from exceptions import ScraperException
|
|
|
|
|
|
class CLIApplication:
|
|
def __init__(self):
|
|
self.controller = ScraperController()
|
|
self.view = ConsoleView()
|
|
|
|
def run(self, args=None):
|
|
parser = self._create_parser()
|
|
parsed_args = parser.parse_args(args)
|
|
|
|
if hasattr(parsed_args, 'func'):
|
|
try:
|
|
parsed_args.func(parsed_args)
|
|
except ScraperException as e:
|
|
self.view.display_error(str(e))
|
|
if e.original_exception:
|
|
self.view.display_error(f"Original error: {e.original_exception}")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
self.view.display_error(f"Unexpected error: {str(e)}")
|
|
sys.exit(1)
|
|
else:
|
|
parser.print_help()
|
|
|
|
def _create_parser(self) -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
description='Web Scraper CLI - MVC + Command Pattern + Strategy Pattern',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
|
|
|
scrape_parser = subparsers.add_parser('scrape', help='Scrape data from a website')
|
|
scrape_parser.add_argument(
|
|
'strategy',
|
|
choices=['news_scraper', 'books_scraper', 'tech_news_scraper', 'all'],
|
|
help='Scraper strategy to use'
|
|
)
|
|
scrape_parser.add_argument(
|
|
'--output', '-o',
|
|
default='data',
|
|
help='Output directory for scraped data'
|
|
)
|
|
scrape_parser.set_defaults(func=self._handle_scrape)
|
|
|
|
list_parser = subparsers.add_parser('list', help='List all available scrapers')
|
|
list_parser.set_defaults(func=self._handle_list)
|
|
|
|
info_parser = subparsers.add_parser('info', help='Show detailed info about a scraper')
|
|
info_parser.add_argument('strategy', help='Strategy name')
|
|
info_parser.set_defaults(func=self._handle_info)
|
|
|
|
return parser
|
|
|
|
def _handle_scrape(self, args):
|
|
if args.strategy == 'all':
|
|
strategies = ['news_scraper', 'books_scraper', 'tech_news_scraper']
|
|
for strategy in strategies:
|
|
self._scrape_single(strategy, args.output)
|
|
else:
|
|
self._scrape_single(args.strategy, args.output)
|
|
|
|
def _scrape_single(self, strategy_name: str, output_dir: str):
|
|
self.controller.output_dir = output_dir
|
|
command = ScrapeCommand(self.controller, strategy_name)
|
|
data, saved_path = command.execute()
|
|
self.view.display_success(f"Scraped {data.total_items} items using {strategy_name}")
|
|
self.view.display_scraped_data(data, saved_path)
|
|
|
|
def _handle_list(self, args):
|
|
command = ListCommand(self.controller)
|
|
strategies = command.execute()
|
|
self.view.display_strategies(strategies)
|
|
|
|
def _handle_info(self, args):
|
|
strategies = self.controller.list_strategies()
|
|
strategy = next((s for s in strategies if s['name'] == args.strategy), None)
|
|
if strategy:
|
|
self.view.display_message(f"\n=== {strategy['name']} ===")
|
|
self.view.display_message(f"Source: {strategy['source']}")
|
|
else:
|
|
self.view.display_error(f"Strategy '{args.strategy}' not found")
|
|
|
|
|
|
def main():
|
|
app = CLIApplication()
|
|
app.run()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|