You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.6 KiB

import argparse
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from controllers import ScraperController
from commands import ScrapeCommand, ListCommand
from views import ConsoleView
from exceptions import ScraperException
class CLIApplication:
def __init__(self):
self.controller = ScraperController()
self.view = ConsoleView()
def run(self, args=None):
parser = self._create_parser()
parsed_args = parser.parse_args(args)
if hasattr(parsed_args, 'func'):
try:
parsed_args.func(parsed_args)
except ScraperException as e:
self.view.display_error(str(e))
if e.original_exception:
self.view.display_error(f"Original error: {e.original_exception}")
sys.exit(1)
except Exception as e:
self.view.display_error(f"Unexpected error: {str(e)}")
sys.exit(1)
else:
parser.print_help()
def _create_parser(self) -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Web Scraper CLI - MVC + Command Pattern + Strategy Pattern',
formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest='command', help='Available commands')
scrape_parser = subparsers.add_parser('scrape', help='Scrape data from a website')
scrape_parser.add_argument(
'strategy',
choices=['news_scraper', 'books_scraper', 'tech_news_scraper', 'all'],
help='Scraper strategy to use'
)
scrape_parser.add_argument(
'--output', '-o',
default='data',
help='Output directory for scraped data'
)
scrape_parser.set_defaults(func=self._handle_scrape)
list_parser = subparsers.add_parser('list', help='List all available scrapers')
list_parser.set_defaults(func=self._handle_list)
info_parser = subparsers.add_parser('info', help='Show detailed info about a scraper')
info_parser.add_argument('strategy', help='Strategy name')
info_parser.set_defaults(func=self._handle_info)
return parser
def _handle_scrape(self, args):
if args.strategy == 'all':
strategies = ['news_scraper', 'books_scraper', 'tech_news_scraper']
for strategy in strategies:
self._scrape_single(strategy, args.output)
else:
self._scrape_single(args.strategy, args.output)
def _scrape_single(self, strategy_name: str, output_dir: str):
self.controller.output_dir = output_dir
command = ScrapeCommand(self.controller, strategy_name)
data, saved_path = command.execute()
self.view.display_success(f"Scraped {data.total_items} items using {strategy_name}")
self.view.display_scraped_data(data, saved_path)
def _handle_list(self, args):
command = ListCommand(self.controller)
strategies = command.execute()
self.view.display_strategies(strategies)
def _handle_info(self, args):
strategies = self.controller.list_strategies()
strategy = next((s for s in strategies if s['name'] == args.strategy), None)
if strategy:
self.view.display_message(f"\n=== {strategy['name']} ===")
self.view.display_message(f"Source: {strategy['source']}")
else:
self.view.display_error(f"Strategy '{args.strategy}' not found")
def main():
app = CLIApplication()
app.run()
if __name__ == '__main__':
main()