{"id":29685,"date":"2023-08-05T16:53:59","date_gmt":"2024-01-28T19:50:38","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/"},"modified":"2025-08-12T15:21:48","modified_gmt":"2025-08-12T07:21:48","slug":"scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/","title":{"rendered":"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461"},"content":{"rendered":"<h1>\u5728\u8fd9\u7bc7\u6587\u7ae0\u4e2d\u6211\u4eec\u53ef\u4ee5\u4e86\u89e3\u5230\u4ec0\u4e48\uff1f\u30fb\u4f7f\u7528Scrapy\u521b\u5efa\u57fa\u672c\u722c\u866b\u7684\u65b9\u6cd5<br \/>\n\u30fbScrapy\u548cMongoDB\u7684\u8fde\u63a5\u65b9\u5f0f<\/p>\n<h1>\u8fd9\u662f\u4e00\u7bc7\u5173\u4e8e\u4ec0\u4e48\u7684\u6587\u7ae0\uff1f\u6211\u4f1a\u4ecb\u7ecd\u4e00\u4e0b\u722c\u866b\uff08Spider\uff09\u3002<br \/>\n\u8fd9\u662f\u4e0a\u4e00\u7bc7\u6587\u7ae0\u7684\u7eed\u7bc7\u3002\u5728\u4e0a\u4e00\u7bc7\u6587\u7ae0\u4e2d\uff0c\u6211\u4eec\u5b66\u4f1a\u4e86\u4f7f\u7528xpath\u8fdb\u884c\u7f51\u9875\u6293\u53d6\u3002\u800c\u8fd9\u6b21\uff0c\u6211\u4eec\u5c06\u4f7f\u722c\u866b\u80fd\u591f\u81ea\u52a8\u6293\u53d6\u591a\u4e2a\u7f51\u9875\u3002<br \/>\n\u4ee3\u7801\u5df2\u7ecf\u4e0a\u4f20\u5230GitHub\u4e0a\u4e86\u3002<\/p>\n<h1>\u8718\u86db\u7684\u6982\u8ff0<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d038a37434c4406bd3df2\/5-0.png\" alt=\"image.png\" \/><\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">def<\/span> <span class=\"nf\">parse<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">response<\/span><span class=\"p\">):<\/span>\r\n        <span class=\"k\">if<\/span> <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span> <span class=\"o\">==<\/span> <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">limit_id<\/span><span class=\"p\">:<\/span> \r\n            <span class=\"k\">pass<\/span>\r\n        <span class=\"k\">else<\/span><span class=\"p\">:<\/span>\r\n            <span class=\"n\">url<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'http:\/\/anikore.jp\/anime\/'<\/span> <span class=\"o\">+<\/span> <span class=\"nb\">str<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span><span class=\"p\">)<\/span> <span class=\"o\">+<\/span>  <span class=\"s\">'\/'<\/span>\r\n\r\n\r\n            <span class=\"n\">anime_id<\/span> <span class=\"o\">=<\/span> <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span>\r\n            <span class=\"n\">title<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"clm24\"]\/\/h2\/a[@class=\"blk_lnk\"]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract_first<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[1]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point_story<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[2]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point_animation<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[3]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point_vc<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[4]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point_music<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[5]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">point_chara<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[1]\/div[6]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">total_point<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[2]\/div[1]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">review_num<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[2]\/div[2]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">fav_num<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[2]\/div[3]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">ranking<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[2]\/div[2]\/div[4]\/span[2]\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n            <span class=\"n\">summary<\/span> <span class=\"o\">=<\/span> <span class=\"n\">response<\/span><span class=\"p\">.<\/span><span class=\"n\">xpath<\/span><span class=\"p\">(<\/span><span class=\"s\">'\/\/*[@id=\"main\"]\/div[2]\/div[3]\/blockquote\/text()'<\/span><span class=\"p\">).<\/span><span class=\"n\">extract<\/span><span class=\"p\">()<\/span>\r\n\r\n            <span class=\"k\">print<\/span><span class=\"p\">(<\/span><span class=\"s\">\"____________________________________________________\"<\/span><span class=\"p\">)<\/span>\r\n            <span class=\"k\">print<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span><span class=\"p\">)<\/span>\r\n\r\n            <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span> <span class=\"o\">+=<\/span> <span class=\"mi\">1<\/span>\r\n\r\n\r\n            <span class=\"c1\"># if anime page exist\r\n<\/span>            <span class=\"k\">if<\/span> <span class=\"n\">title<\/span> <span class=\"ow\">is<\/span> <span class=\"ow\">not<\/span> <span class=\"bp\">None<\/span><span class=\"p\">:<\/span>\r\n\r\n\r\n                <span class=\"c1\">#output \r\n<\/span>                <span class=\"k\">yield<\/span> <span class=\"p\">{<\/span><span class=\"s\">\"anime_id\"<\/span><span class=\"p\">:<\/span><span class=\"n\">anime_id<\/span><span class=\"p\">,<\/span><span class=\"s\">\"title\"<\/span><span class=\"p\">:<\/span><span class=\"n\">title<\/span><span class=\"p\">,<\/span><span class=\"s\">\"point\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point<\/span><span class=\"p\">,<\/span><span class=\"s\">\"point_story\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point_story<\/span><span class=\"p\">,<\/span><span class=\"s\">\"point_animation\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point_animation<\/span><span class=\"p\">,<\/span> \\\r\n                <span class=\"s\">\"point_vc\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point_vc<\/span><span class=\"p\">,<\/span><span class=\"s\">\"point_music\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point_music<\/span><span class=\"p\">,<\/span><span class=\"s\">\"point_chara\"<\/span><span class=\"p\">:<\/span><span class=\"n\">point_chara<\/span><span class=\"p\">,<\/span> \\\r\n                <span class=\"s\">\"total_point\"<\/span><span class=\"p\">:<\/span><span class=\"n\">total_point<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"review_num\"<\/span><span class=\"p\">:<\/span><span class=\"n\">review_num<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"fav_num\"<\/span><span class=\"p\">:<\/span><span class=\"n\">fav_num<\/span><span class=\"p\">,<\/span> \\\r\n                <span class=\"s\">\"ranking\"<\/span><span class=\"p\">:<\/span><span class=\"n\">ranking<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"summary\"<\/span><span class=\"p\">:<\/span><span class=\"n\">summary<\/span><span class=\"p\">}<\/span>\r\n\r\n                <span class=\"c1\"># crawl next anime page\r\n<\/span>                <span class=\"n\">next_url<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'http:\/\/anikore.jp\/anime\/'<\/span> <span class=\"o\">+<\/span> <span class=\"nb\">str<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span><span class=\"p\">)<\/span> <span class=\"o\">+<\/span>  <span class=\"s\">'\/'<\/span>\r\n                <span class=\"k\">yield<\/span> <span class=\"n\">Request<\/span><span class=\"p\">(<\/span><span class=\"n\">next_url<\/span><span class=\"p\">,<\/span> <span class=\"n\">callback<\/span><span class=\"o\">=<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">parse<\/span><span class=\"p\">,<\/span> <span class=\"n\">dont_filter<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\r\n            <span class=\"c1\"># If animepage does not exist, redirect to homepage. So, title is None.\r\n<\/span>            <span class=\"k\">else<\/span><span class=\"p\">:<\/span>\r\n\r\n                <span class=\"c1\">#crawl next anime page\r\n<\/span>                <span class=\"n\">next_url<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'http:\/\/anikore.jp\/anime\/'<\/span> <span class=\"o\">+<\/span> <span class=\"nb\">str<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">num<\/span><span class=\"p\">)<\/span> <span class=\"o\">+<\/span>  <span class=\"s\">'\/'<\/span>\r\n                <span class=\"k\">yield<\/span> <span class=\"n\">Request<\/span><span class=\"p\">(<\/span><span class=\"n\">next_url<\/span><span class=\"p\">,<\/span> <span class=\"n\">callback<\/span><span class=\"o\">=<\/span><span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">parse<\/span><span class=\"p\">,<\/span> <span class=\"n\">dont_filter<\/span><span class=\"o\">=<\/span><span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u4ee3\u7801\u8981\u70b9<br \/>\n1. \u8fd9\u6b21\u7684URL\u89c4\u5219\u662fhttps:\/\/www.anikore.jp\/anime\/[\u52a8\u6f2bID]\/\uff0c\u6240\u4ee5\u901a\u8fc7\u9012\u589e\u52a8\u6f2bID\u6765\u8bbf\u95ee\u6240\u6709\u52a8\u6f2b\u9875\u9762\u3002<br \/>\n2. \u4f7f\u7528response.xpath()\u8fdb\u884c\u5404\u79cd\u722c\u866b\u64cd\u4f5c\u3002<br \/>\n3. \u901a\u8fc7yield {&#8220;anime_id&#8221;:\u52a8\u6f2bID,&#8221;title&#8221;:\u6807\u9898,&#8230;,}\u6765\u8f93\u51fa\u722c\u53d6\u7684\u6570\u636e\u3002<br \/>\n4. \u901a\u8fc7yield Request(next_url, callback=self.parse, dont_filter=True)\u8df3\u8f6c\u5230\u4e0b\u4e00\u4e2a\u52a8\u6f2b\u9875\u9762\u3002<\/p>\n<h1>\u4e0eMongoDB\u7684\u8fde\u63a5<br \/>\nScrapy\u6709\u4e00\u4e2a\u53eb\u505apipeline\u7684\u7ed3\u6784\u3002\u5173\u4e8eScrapy\u7684\u7ed3\u6784\uff0c\u6211\u975e\u5e38\u60f3\u53c2\u8003@checkpoint\u5148\u751f\u7684\u8fd9\u7bc7\u6587\u7ae0\uff0c\u56e0\u4e3a\u5b83\u975e\u5e38\u6613\u61c2\u3002<br \/>\n\u4e0b\u9762\u662fpipeline.py\u7684\u4ee3\u7801\u3002<\/p>\n<pre class=\"post-pre\"><code>\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">pymongo<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">MongoClient<\/span>  <span class=\"c1\"># mongoDB \u3068\u306e\u63a5\u7d9a\r\n<\/span><span class=\"kn\">import<\/span> <span class=\"nn\">datetime<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">scrapy.conf<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">settings<\/span>\r\n\r\n\r\n<span class=\"k\">class<\/span> <span class=\"nc\">MongoDBPipeline<\/span><span class=\"p\">(<\/span><span class=\"nb\">object<\/span><span class=\"p\">):<\/span>\r\n\r\n    <span class=\"k\">def<\/span> <span class=\"nf\">__init__<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">):<\/span>\r\n        <span class=\"c1\"># \u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u751f\u6210\u6642\u306b\u6e21\u3055\u308c\u305f\u5f15\u6570\u3067\u3001\u5909\u6570\u521d\u671f\u5316\r\n<\/span>        <span class=\"n\">connection<\/span> <span class=\"o\">=<\/span> <span class=\"n\">MongoClient<\/span><span class=\"p\">(<\/span>\r\n            <span class=\"n\">settings<\/span><span class=\"p\">[<\/span><span class=\"s\">'MONGODB_SERVER'<\/span><span class=\"p\">],<\/span>\r\n            <span class=\"n\">settings<\/span><span class=\"p\">[<\/span><span class=\"s\">'MONGODB_PORT'<\/span><span class=\"p\">])<\/span>\r\n        <span class=\"n\">db<\/span> <span class=\"o\">=<\/span> <span class=\"n\">connection<\/span><span class=\"p\">[<\/span><span class=\"n\">settings<\/span><span class=\"p\">[<\/span><span class=\"s\">'MONGODB_DB'<\/span><span class=\"p\">]]<\/span>\r\n        <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">collection<\/span> <span class=\"o\">=<\/span> <span class=\"n\">db<\/span><span class=\"p\">[<\/span><span class=\"n\">settings<\/span><span class=\"p\">[<\/span><span class=\"s\">'MONGODB_COLLECTION'<\/span><span class=\"p\">]]<\/span>\r\n\r\n\r\n\r\n    <span class=\"k\">def<\/span> <span class=\"nf\">process_item<\/span><span class=\"p\">(<\/span><span class=\"bp\">self<\/span><span class=\"p\">,<\/span> <span class=\"n\">item<\/span><span class=\"p\">,<\/span> <span class=\"n\">spider<\/span><span class=\"p\">):<\/span>\r\n        <span class=\"bp\">self<\/span><span class=\"p\">.<\/span><span class=\"n\">collection<\/span><span class=\"p\">.<\/span><span class=\"n\">insert<\/span><span class=\"p\">(<\/span><span class=\"nb\">dict<\/span><span class=\"p\">(<\/span><span class=\"n\">item<\/span><span class=\"p\">))<\/span>\r\n        <span class=\"k\">return<\/span> <span class=\"n\">item<\/span>\r\n<\/code><\/pre>\n<p>\u6709\u56db\u4e2a\u540d\u4e3aMONGODB_*\u7684\u53d8\u91cf\uff0c\u8fd9\u4e9b\u53d8\u91cf\u5728setting.py\u6587\u4ef6\u4e2d\u88ab\u5b9a\u4e49\u3002<\/p>\n<pre class=\"post-pre\"><code>\r\n\r\n\r\n<span class=\"c1\">#mongoDB settings\r\n<\/span><span class=\"n\">MONGODB_SERVER<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'localhost'<\/span>\r\n<span class=\"n\">MONGODB_PORT<\/span> <span class=\"o\">=<\/span> <span class=\"mi\">27017<\/span>\r\n<span class=\"c1\">#db name\r\n<\/span><span class=\"n\">MONGODB_DB<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'anikore'<\/span>\r\n<span class=\"c1\">#collection name\r\n<\/span><span class=\"n\">MONGODB_COLLECTION<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"users\"<\/span>\r\n\r\n<\/code><\/pre>\n<p>\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u4f1a\u5728\u540d\u4e3aanikore\u7684\u6570\u636e\u5e93\u4e2d\u521b\u5efa\u4e00\u4e2a\u540d\u4e3auser\u7684\u96c6\u5408\uff0c\u5e76\u5c06\u6570\u636e\u5b58\u5165\u5176\u4e2d\u3002<\/p>\n<h1>\u6267\u884c\u7ed3\u679c\u8bf7\u7528\u4e0b\u9762\u7684\u547d\u4ee4\u8fd0\u884c\u8718\u86db\u3002<\/p>\n<pre class=\"post-pre\"><code>scrapy crawl anime\r\n<\/code><\/pre>\n<p>\u4e5f\u53ef\u4ee5\u540c\u65f6\u5c06\u6570\u636e\u5199\u5165CSV\u6587\u4ef6\u4e2d\u3002<\/p>\n<pre class=\"post-pre\"><code>scrapy crawl anime -o anime.csv\r\n<\/code><\/pre>\n<p>\u7136\u540e\uff0c\u4f60\u5e94\u8be5\u80fd\u5728\u7ec8\u7aef\u4e2d\u611f\u53d7\u5230\u8718\u86db\u7684\u79fb\u52a8\u3002<br \/>\n\u63a5\u7740\uff0c\u5982\u679cMongoDB\u6570\u636e\u5df2\u7ecf\u4fdd\u5b58\u4e0b\u6765\uff0c\u90a3\u5c31\u662f\u6210\u529f\u4e86\u3002\u592a\u597d\u4e86\u3002<\/p>\n<h1>\u6b64\u5916\u5982\u679c\u6211\u4eec\u8fd0\u7528\u672c\u6b21\u4ecb\u7ecd\u7684\u722c\u866b\u6280\u5de7\uff0c\u4f3c\u4e4e\u53ef\u4ee5\u6536\u96c6\u4efb\u610f\u7f51\u7ad9\u7684\u6570\u636e\u3002\u7136\u800c\uff0c\u8fd9\u5e76\u975e\u5982\u6b64\u7b80\u5355\u7684\u73b0\u5b9e\u60c5\u51b5\u3002\u4f8b\u5982\uff0c\u7528\u6237\u7684\u6570\u636e\u53ea\u6709\u5728\u767b\u5f55\u540e\u624d\u80fd\u67e5\u770b\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c\u4f5c\u4e3a\u722c\u866b\u7684\u529f\u80fd\uff0c\u6211\u4eec\u9700\u8981\u5728\u767b\u5f55\u9875\u9762\u8fdb\u884c\u767b\u5f55\u3002\u8fd9\u53ef\u4ee5\u901a\u8fc7Scrapy\u7684FormRequest\u529f\u80fd\u6765\u5b9e\u73b0\u3002<\/p>\n<p>\u611f\u8c22\u60a8\u7684\u9605\u8bfb\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u8fd9\u7bc7\u6587\u7ae0\u4e2d\u6211\u4eec\u53ef\u4ee5\u4e86\u89e3\u5230\u4ec0\u4e48\uff1f\u30fb\u4f7f\u7528Scrapy\u521b\u5efa\u57fa\u672c\u722c\u866b\u7684\u65b9\u6cd5 \u30fbScrapy\u548cMongoDB\u7684\u8fde\u63a5\u65b9\u5f0f [&hellip;]<\/p>\n","protected":false},"author":8,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[227],"class_list":["post-29685","post","type-post","status-publish","format-standard","hentry","category-uncategorized","tag-227"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461 - Blog - Silicon Cloud<\/title>\n<meta name=\"description\" content=\"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u3002\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\" \/>\n<meta property=\"og:description\" content=\"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u3002\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2024-01-28T19:50:38+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-08-12T07:21:48+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d038a37434c4406bd3df2\/5-0.png\" \/>\n<meta name=\"author\" content=\"\u96c5, \u609f\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u96c5, \u609f\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"1 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/\",\"name\":\"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2024-01-28T19:50:38+00:00\",\"dateModified\":\"2025-08-12T07:21:48+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\"},\"description\":\"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0\",\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\",\"name\":\"\u96c5, \u609f\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"caption\":\"\u96c5, \u609f\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461 - Blog - Silicon Cloud","description":"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u3002\/","og_locale":"zh_CN","og_type":"article","og_title":"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461","og_description":"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u3002\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2024-01-28T19:50:38+00:00","article_modified_time":"2025-08-12T07:21:48+00:00","og_image":[{"url":"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d038a37434c4406bd3df2\/5-0.png"}],"author":"\u96c5, \u609f","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u96c5, \u609f","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"1 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/","name":"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2024-01-28T19:50:38+00:00","dateModified":"2025-08-12T07:21:48+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6"},"description":"\u5173\u4e8eScrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461\u7684\u6280\u672f\u6587\u7ae0","breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"Scrapy\u7528\u4e8e\u722c\u53d6\u52a8\u6f2b\u6570\u636e\u2461"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6","name":"\u96c5, \u609f","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","caption":"\u96c5, \u609f"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/scrapy%e7%94%a8%e4%ba%8e%e7%88%ac%e5%8f%96%e5%8a%a8%e6%bc%ab%e6%95%b0%e6%8d%ae%e2%91%a1%e3%80%82\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/29685","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/8"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=29685"}],"version-history":[{"count":3,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/29685\/revisions"}],"predecessor-version":[{"id":111533,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/29685\/revisions\/111533"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=29685"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=29685"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=29685"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}