diff --git a/project/src/project/crawler/MovieCrawler.java b/project/src/project/crawler/MovieCrawler.java
new file mode 100644
index 0000000..9dda973
--- /dev/null
+++ b/project/src/project/crawler/MovieCrawler.java
@@ -0,0 +1,194 @@
+package project.crawler;
+
+import project.bean.Movie;
+import project.utils.DataCleaner;
+import project.utils.HttpUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class MovieCrawler {
+    public static List<Movie> crawlMovies(int pageCount) throws Exception {
+        List<Movie> movies = new ArrayList<>();
+        
+        for (int page = 1; page <= pageCount; page++) {
+            String url = "https://movie.douban.com/top250?start=" + (page - 1) * 25;
+            System.out.println("Crawling page " + page + " from " + url);
+            try {
+                String html = HttpUtils.getHtml(url);
+                System.out.println("Got HTML content, length: " + html.length());
+                
+                // 打印 HTML 内容的前 500 个字符，了解实际结构
+                if (html.length() > 500) {
+                    System.out.println("HTML preview: " + html.substring(0, 500) + "...");
+                }
+                
+                List<Movie> pageMovies = parseMovies(html);
+                System.out.println("Parsed " + pageMovies.size() + " movies from page " + page);
+                movies.addAll(pageMovies);
+            } catch (Exception e) {
+                System.out.println("Error crawling page " + page + ": " + e.getMessage());
+            }
+            Thread.sleep(1000); // 控制请求频率
+        }
+        
+        System.out.println("Total movies crawled: " + movies.size());
+        return movies;
+    }
+
+    private static List<Movie> parseMovies(String html) {
+        List<Movie> movies = new ArrayList<>();
+        
+        // Find all movie items by looking for <div class="item"> and matching until </div> at the same nesting level
+        int startIndex = 0;
+        int count = 0;
+        
+        while (true) {
+            int itemStart = html.indexOf("<div class=\"item\">", startIndex);
+            if (itemStart < 0) break;
+            
+            // Find the matching </div> by counting nested divs
+            int pos = itemStart + "<div class=\"item\">".length();
+            int depth = 1;
+            int itemEnd = -1;
+            
+            while (pos < html.length() && depth > 0) {
+                int nextOpen = html.indexOf("<div", pos);
+                int nextClose = html.indexOf("</div>", pos);
+                
+                if (nextClose < 0) break; // No closing tag found
+                
+                if (nextOpen >= 0 && nextOpen < nextClose) {
+                    // Found an opening div before closing
+                    depth++;
+                    pos = nextOpen + 4;
+                } else {
+                    // Found a closing div
+                    depth--;
+                    if (depth == 0) {
+                        itemEnd = nextClose + 6;
+                    }
+                    pos = nextClose + 6;
+                }
+            }
+            
+            if (itemEnd > itemStart) {
+                count++;
+                String movieHtml = html.substring(itemStart, itemEnd);
+                // Don't print movie HTML to avoid excessive output
+                Movie movie = parseMovie(movieHtml);
+                if (movie != null) {
+                    movies.add(movie);
+                }
+                startIndex = itemEnd;
+            } else {
+                break;
+            }
+        }
+        
+        System.out.println("Found " + count + " movie items, parsed " + movies.size() + " valid movies");
+        return movies;
+    }
+
+    private static Movie parseMovie(String movieHtml) {
+        try {
+            // Extract title from img alt attribute
+            String title = "";
+            int altIndex = movieHtml.indexOf("alt=");
+            if (altIndex > 0) {
+                int start = movieHtml.indexOf('"', altIndex);
+                int end = movieHtml.indexOf('"', start + 1);
+                if (start > 0 && end > 0) {
+                    title = movieHtml.substring(start + 1, end).trim();
+                }
+            }
+            
+            // Extract rating
+            double rating = 0.0;
+            int ratingIndex = movieHtml.indexOf("rating_num");
+            if (ratingIndex > 0) {
+                int start = movieHtml.indexOf('>', ratingIndex);
+                int end = movieHtml.indexOf("</span>", start);
+                if (start > 0 && end > 0) {
+                    String ratingStr = movieHtml.substring(start + 1, end).trim();
+                    try {
+                        rating = Double.parseDouble(ratingStr);
+                    } catch (NumberFormatException e) {
+                        rating = 0.0;
+                    }
+                }
+            }
+            
+            // Extract year and director from movie info
+            int year = 0;
+            String director = "Unknown";
+            
+            // Find the info section which contains year and director
+            // Look for <p> tag without class or with specific class
+            int infoStart = -1;
+            int pStart = movieHtml.indexOf("<p>");
+            int pClassStart = movieHtml.indexOf("<p class=\"\">");
+            
+            if (pStart >= 0) {
+                infoStart = pStart;
+            }
+            if (pClassStart >= 0 && (pStart < 0 || pClassStart < pStart)) {
+                infoStart = pClassStart;
+            }
+            
+            if (infoStart > 0) {
+                int infoEnd = movieHtml.indexOf("</p>", infoStart);
+                if (infoEnd > infoStart) {
+                    String infoSection = movieHtml.substring(infoStart, infoEnd);
+                    
+                    // Extract year - look for 4-digit year after <br> tag
+                    int brIndex = infoSection.indexOf("<br>");
+                    if (brIndex > 0) {
+                        String afterBr = infoSection.substring(brIndex + 4).trim();
+                        // Find first 4-digit number
+                        for (int i = 0; i <= afterBr.length() - 4; i++) {
+                            String possibleYear = afterBr.substring(i, i + 4);
+                            if (possibleYear.matches("\\d{4}")) {
+                                try {
+                                    year = Integer.parseInt(possibleYear);
+                                    break;
+                                } catch (NumberFormatException e) {
+                                    // Continue
+                                }
+                            }
+                        }
+                    }
+                    
+                    // Extract director - director info is between "导演:" and "&nbsp;"
+                    // Look for the pattern: 导演: [director name]&nbsp;
+                    int directorLabelIdx = infoSection.indexOf("\u5bfc\u6f14:"); // Unicode for "导演:"
+                    if (directorLabelIdx >= 0) {
+                        int directorStart = directorLabelIdx + 3; // Skip "导演:"
+                        int directorEnd = infoSection.indexOf("&nbsp;", directorStart);
+                        if (directorEnd > directorStart) {
+                            director = infoSection.substring(directorStart, directorEnd).trim();
+                            // Clean up any remaining HTML
+                            director = director.replaceAll("<[^>]*>", "").trim();
+                            // Extract only Chinese name (before space)
+                            int spaceIdx = director.indexOf(" ");
+                            if (spaceIdx > 0) {
+                                director = director.substring(0, spaceIdx).trim();
+                            }
+                            if (director.isEmpty()) director = "Unknown";
+                        }
+                    }
+                }
+            }
+            
+            // If title and rating are valid, create movie object
+            if (!title.isEmpty() && rating > 0) {
+                return new Movie(title, rating, year, director);
+            }
+        } catch (Exception e) {
+            // Silently handle exceptions
+        }
+        return null;
+    }
+}
\ No newline at end of file