{"id":3667,"date":"2022-03-18T11:54:16","date_gmt":"2022-03-18T03:54:16","guid":{"rendered":"https:\/\/egonlin.com\/?p=3667"},"modified":"2022-03-18T11:54:16","modified_gmt":"2022-03-18T03:54:16","slug":"%e7%ac%ac%e4%b8%89%e8%8a%82%ef%bc%9apandas%e8%bf%9b%e9%98%b6%e4%b9%8b%e6%8f%90%e5%8d%87%e8%bf%90%e8%a1%8c%e6%95%88%e7%8e%87","status":"publish","type":"post","link":"https:\/\/egonlin.com\/?p=3667","title":{"rendered":"\u7b2c\u4e09\u8282\uff1apandas\u8fdb\u9636\u4e4b\u63d0\u5347\u8fd0\u884c\u6548\u7387"},"content":{"rendered":"<h1>\u524d\u8a00<\/h1>\n<p>\u5982\u679c\u4f60\u73b0\u5728\u6b63\u5728\u5b66\u4e60\u6570\u636e\u5206\u6790\uff0c\u6216\u8005\u6b63\u5728\u4ece\u4e8b\u6570\u636e\u5206\u6790\u884c\u4e1a\uff0c\u80af\u5b9a\u4f1a\u5904\u7406\u4e00\u4e9b\u5927\u6570\u636e\u96c6\u3002pandas\u5c31\u662f\u8fd9\u4e9b\u5927\u6570\u636e\u96c6\u7684\u4e00\u4e2a\u5f88\u597d\u7684\u5904\u7406\u5de5\u5177\u3002\u90a3\u4e48pandas\u5230\u5e95\u662f\u4ec0\u4e48\u5462\uff1f\u5b98\u65b9\u6587\u6863\u4e0a\u8bf4\uff1a<\/p>\n<blockquote>\n<p>&quot; <strong>\u5feb\u901f<\/strong>\uff0c<strong>\u7075\u6d3b<\/strong>\uff0c\u5bcc\u6709\u8868\u73b0\u529b\u7684\u6570\u636e\u7ed3\u6784\uff0c\u65e8\u5728\u4f7f\u201d\u5173\u7cfb\u201c\u6216\u201d\u6807\u8bb0\u201c\u6570\u636e\u7684\u4f7f\u7528\u65e2<strong>\u7b80\u5355<\/strong>\u53c8<strong>\u76f4\u89c2<\/strong>\u3002&quot;<\/p>\n<\/blockquote>\n<p>\u5feb\u901f\u3001\u7075\u6d3b\u3001\u7b80\u5355\u3001\u76f4\u89c2\uff01\u8fd9\u4e9b\u542c\u8d77\u6765\u611f\u89c9\u5f88\u68d2\u3002\u5982\u679c\u4f60\u7684\u5de5\u4f5c\u6d89\u53ca\u5230\u6784\u5efa\u590d\u6742\u7684\u6570\u636e\u6a21\u578b\uff0c\u4f60\u80af\u5b9a\u4e0d\u5e0c\u671b\u82b1\u8d39\u5927\u91cf\u7684\u5f00\u53d1\u65f6\u95f4\u7b49\u5f85\u6a21\u5757\u5904\u7406\u5927\u6570\u636e\u96c6\u3002\u6211\u4eec\u9700\u8981\u5c06\u5927\u91cf\u7684\u65f6\u95f4\u4e0e\u7cbe\u529b\u653e\u5728\u89e3\u91ca\u6570\u636e\u5f53\u4e2d\uff0c\u800c\u4e0d\u662f\u4f7f\u7528\u90a3\u4e9b\u529f\u80fd\u8f83\u5c11\u7684\u5de5\u5177\uff0c\u4e3a\u4e86\u5904\u7406\u6570\u636e\u800c\u715e\u8d39\u82e6\u5fc3\u3002<\/p>\n<h1>Pandas\u5904\u7406\u6570\u636e\u6162\uff1f<\/h1>\n<p>\u5728\u4f7f\u7528\u7684pandas\u7684\u8fc7\u7a0b\u4e2d\u6709\u4eba\u8bf4\uff0c\u867d\u7136\u4ed6\u662f\u4e00\u4e2a\u5f88\u597d\u7684\u89e3\u6790\u6570\u636e\u7684\u5de5\u5177\uff0c\u4f46\u662f\u56e0\u4e3a\u5b83\u7684\u901f\u5ea6\u592a\u6162\u4e86\uff0c\u65e0\u6cd5\u4f5c\u4e3a\u7edf\u8ba1\u5efa\u6a21\u5de5\u5177\u3002\u5bf9\u4e8e\u521d\u5b66\u8005\u5728\u81ea\u5df1\u7684\u4f7f\u7528\u5f53\u4e2d\u53ef\u80fd\u4f1a\u53d1\u73b0\uff0c\u5b83\u7684\u8fd0\u884c\u901f\u5ea6\uff0c\u5e76\u4e0d\u7b26\u5408\u4e00\u4e2a\u6570\u636e\u5206\u6790\u5de5\u5177\u7684\u6807\u51c6\u3002<\/p>\n<p>\u4f46\u662fPandas\u7684\u5f00\u53d1\u662f\u5efa\u7acb\u5728Numpy\u7684\u6570\u7ec4\u7ed3\u6784\u4e4b\u4e0a\u7684\uff0c\u5b83\u7684\u8bb8\u591a\u64cd\u4f5c\u90fd\u662f\u901a\u8fc7C\u8bed\u8a00\u5b9e\u73b0\u7684\uff0c\u57fa\u4e8eNumpy\u548cPandas\u81ea\u5df1\u7684\u62d3\u5c55\u6a21\u5757\u6765\u7f16\u5199\u7684\uff0c\u8fd9\u4e9b\u6a21\u5757\u662fCpython\u7f16\u5199\u7684\uff0c\u7f16\u8bd1\u6210C\u8bed\u8a00\u3002\u8fd9\u6837\u6765\u770b\uff0cpandas\u7684\u901f\u5ea6\u80af\u5b9a\u5feb\u7684\u3002<\/p>\n<p>\u4e8b\u5b9e\u8bc1\u660e\uff0c\u80af\u5b9a\u662f\uff0c\u4f46\u662f\u4f60\u5fc5\u987b\u6b63\u786e\u7684\u4f7f\u7528\u5b83\uff01<\/p>\n<p>\u672c\u6587\u4e0d\u662f\u8bb2\u5982\u4f55\u8fc7\u5ea6\u4f18\u5316Pandas\u7684\u4ee3\u7801\uff0c\u800c\u662f\u8bb2\u5982\u4f55\u6b63\u786e\u7684\u4f7f\u7528\u5b83\uff0c\u4e3b\u8981\u4ecb\u7ecd\u51e0\u79cdpandas\u4e2d\u5e38\u7528\u5230\u7684\u65b9\u6cd5\uff0c\u5bf9\u4e8e\u8fd9\u4e9b\u65b9\u6cd5\u7684\u4f7f\u7528\u5b58\u5728\u54ea\u4e9b\u9700\u8981\u6ce8\u610f\u7684\u95ee\u9898\uff0c\u4ee5\u53ca\u5982\u4f55\u5bf9\u5b83\u4eec\u8fdb\u884c\u901f\u5ea6\u4e0a\u7684\u63d0\u5347\u3002<\/p>\n<ul>\n<li>\u8bb2datetime\u6570\u636e\u4e0e\u65f6\u95f4\u5e8f\u5217\u4e00\u8d77\u4f7f\u7528\u7684\u4f18\u70b9<\/li>\n<li>\u8fdb\u884c\u6279\u91cf\u8ba1\u7b97\u7684\u6700\u6709\u6548\u7684\u9014\u5f84<\/li>\n<li>\u901a\u8fc7HDFStore\u5b58\u50a8\u6570\u636e\u8282\u7701\u65f6\u95f4<\/li>\n<\/ul>\n<h1>\u4f7f\u7528datetime\u6570\u636e\u8282\u7701\u65f6\u95f4<\/h1>\n<pre><code class=\"language-python\">import pandas as pd\nfrom timer import timeit\nimport time\nimport numpy as np<\/code><\/pre>\n<pre><code class=\"language-python\">pd.__version__<\/code><\/pre>\n<pre><code>'0.24.2'<\/code><\/pre>\n<pre><code class=\"language-python\">df = pd.read_csv(&#039;demand_profile.csv&#039;)\ndf.head()<\/code><\/pre>\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>date_time<\/th>\n<th>energy_kwh<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>1\/1\/13 0:00<\/td>\n<td>0.586<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>1\/1\/13 1:00<\/td>\n<td>0.580<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>1\/1\/13 2:00<\/td>\n<td>0.572<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>1\/1\/13 3:00<\/td>\n<td>0.596<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>1\/1\/13 4:00<\/td>\n<td>0.592<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<p>\u4ece\u8fd0\u884c\u4e0a\u9762\u7684\u4ee3\u7801\u5f97\u5230\u7684\u7ed3\u679c\u6765\u770b\uff0c\u597d\u50cf\u6ca1\u6709\u4efb\u4f55\u95ee\u9898\u3002\u4f46\u662f\u5b9e\u9645\u4e0apandas\u548cnumpy\u90fd\u6709\u4e00\u4e2adtypes\u7684\u6982\u5ff5\u3002\u5982\u679c\u6bcf\u5929\u6307\u5b9a\u7684\u8bdd\uff0c\u90a3\u4e48date_time\u5c06\u4f1a\u4f7f\u7528\u4e00\u4e2aobject\u7684dtype\u7c7b\u578b\uff0c\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python\">df.dtypes<\/code><\/pre>\n<pre><code>date_time      object\nenergy_kwh    float64\ndtype: object<\/code><\/pre>\n<pre><code class=\"language-python\">type(df.iat[0,0])<\/code><\/pre>\n<pre><code>str<\/code><\/pre>\n<p>object\u7c7b\u578b\u5c31\u50cf\u4e00\u4e2a\u5927\u7684\u5bb9\u5668\uff0c\u4e0d\u4ec5\u4ec5\u53ef\u4ee5\u627f\u8f7dstr\u7c7b\u578b\uff0c\u4e5f\u53ef\u4ee5\u5305\u542b\u90a3\u4e9b\u4e0d\u80fd\u5f88\u597d\u5730\u878d\u8fdb\u4e00\u4e2a\u6570\u636e\u7c7b\u578b\u7684\u4efb\u4f55\u7279\u5f81\u5217\u3002\u800c\u5982\u679c\u6211\u4eec\u5c06\u65e5\u671f\u4f5c\u4e3astr\u7c7b\u578b\u5c31\u4f1a\u6781\u5927\u7684\u5f71\u54cd\u6548\u7387\u3002<br \/>\n\u56e0\u6b64\u9488\u5bf9\u65f6\u95f4\u5e8f\u5217\u7684\u6570\u636e\u800c\u8a00\uff0c\u6211\u4eec\u9700\u8981\u5c06date_time\u5217\u7684\u683c\u5f0f\u8f6c\u6362\u4e3adatetime\u5bf9\u8c61\u6570\u7ec4\uff08pandas\u79f0\u4e4b\u4e3aTimestap\uff09\u3002\u5177\u4f53\u64cd\u4f5c\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python\">df[&#039;date_time&#039;] = pd.to_datetime(df[&#039;date_time&#039;])\nprint(df[&#039;date_time&#039;].dtype)\ndf[&#039;date_time&#039;].head()<\/code><\/pre>\n<pre><code>datetime64[ns]\n\n0   2013-01-01 00:00:00\n1   2013-01-01 01:00:00\n2   2013-01-01 02:00:00\n3   2013-01-01 03:00:00\n4   2013-01-01 04:00:00\nName: date_time, dtype: datetime64[ns]<\/code><\/pre>\n<p>\u4ee5\u4e0a\u5c31\u662fdate_time\u5217\u8f6c\u6362\u7c7b\u578b\u7684\u64cd\u4f5c\u4ee5\u53ca\u8f6c\u6362\u540e\u7684\u6548\u679c\u3002 <\/p>\n<p>\u63a5\u4e0b\u6765\u6211\u4eec\u81ea\u5b9a\u4e49\u4e00\u4e2a<code>@timeit<\/code>\u88c5\u9970\u5668\u8fdb\u884c\u6d4b\u8bd5\uff0c\u5b83\u53ef\u4ee5\u8fd4\u56de\u51fd\u6570\u7684\u8fd0\u884c\u7ed3\u679c\u5e76\u4ece\u591a\u6b21\u5b9e\u9a8c\u4e2d\u6253\u5370\u5b83\u7684\u5e73\u5747\u8fd0\u884c\u65f6\u95f4\u3002<\/p>\n<pre><code class=\"language-python\">@timeit(repeat = 3, number = 10)\ndef convert(df, column_name):\n    return pd.to_datetime(df[column_name])\n\ndf[&#039;date_time&#039;] = convert(df, &#039;date_time&#039;)<\/code><\/pre>\n<pre><code class=\"language-python\">Best of 3 trials with 10 function calls per trial:  \nFunction `convert` ran in average of 0.785 seconds.<\/code><\/pre>\n<p>\u5b9e\u9645\u8fd0\u884c\u901f\u5ea60.785s\uff0c\u770b\u4e0a\u53bb\u975e\u5e38\u5feb\u4e86\uff0c\u4f46\u662f\u5176\u5b9e\u8fd8\u53ef\u4ee5\u66f4\u5feb<\/p>\n<pre><code class=\"language-python\">@timeit(repeat = 3, number = 10)\ndef convert_with_format(df, column_name):\n    return pd.to_datetime(df[column_name],format=&quot;%d\/%m\/%y %H:%M&quot;)\n\ndf[&#039;date_time&#039;] = convert_with_format(df, &#039;date_time&#039;)<\/code><\/pre>\n<pre><code>Best of 3 trials with 10 function calls per trial:  \nFunction <code>convert<\/code> ran in average of 0.022 seconds.<\/code><\/pre>\n<p>\u7ed3\u679c\u53ea\u67090.022s\uff0c\u5feb\u4e86\u5c06\u8fd135\u500d\u3002\u539f\u56e0\u662f\uff1a\u6211\u4eec\u8bbe\u7f6e\u4e86\u8f6c\u5316\u7684\u683c\u5f0fformat\u3002\u7531\u4e8e\u5728CSV\u4e2d\u7684datetimes\u5e76\u4e0d\u662fISO 8601\u683c\u5f0f\u7684\uff0c\u5982\u679c\u4e0d\u8fdb\u884c\u8bbe\u7f6e\u7684\u8bdd\uff0c\u90a3\u4e48pandas\u5c06\u4f7f\u7528dateutil\u5305\u628a\u6bcf\u4e2a\u5b57\u7b26\u4e32str\u8f6c\u5316\u6210date\u65e5\u671f\u3002<br \/>\n\u76f8\u53cd\uff0c\u5982\u679c\u539f\u59cb\u6570\u636edatetime\u5df2\u7ecf\u662fISO 8601\u683c\u5f0f\u4e86\uff0c\u90a3\u4e48pandas\u5c31\u53ef\u4ee5\u7acb\u5373\u4f7f\u7528\u6700\u5feb\u901f\u7684\u65b9\u6cd5\u6765\u89e3\u6790\u65e5\u671f\u3002\u8fd9\u4e5f\u5c31\u662f\u4e3a\u4ec0\u4e48\u63d0\u524d\u8bbe\u7f6e\u597d\u683c\u5f0fformat\u53ef\u4ee5\u63d0\u5347\u8fd9\u4e48\u591a\u7684\u901f\u5ea6<\/p>\n<h1>Pandas\u6570\u636e\u7684\u5faa\u73af\u64cd\u4f5c<\/h1>\n<p>\u4ecd\u7136\u57fa\u4e8e\u4e0a\u9762\u7684\u6570\u636e\uff0c\u6211\u4eec\u518d\u6dfb\u52a0\u4e00\u4e2a\u65b0\u7684\u7279\u5f81\uff0c\u4f46\u662f\u8fd9\u4e2a\u65b0\u7684\u7279\u5f81\u662f\u57fa\u4e8e\u4e00\u4e9b\u65f6\u95f4\u6761\u4ef6\uff0c\u6839\u636e\u65f6\u957f\u800c\u53d8\u5316\uff0c\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python\">data = pd.DataFrame({&#039;Tariff Type&#039;:[&#039;Peak&#039;,&#039;Shoulder&#039;,&#039;Off-Peak&#039;],\n            &#039;Cents per kWh&#039;:[&#039;28&#039;,&#039;20&#039;,&#039;12&#039;],\n            &#039;Time Range&#039;:[&#039;17:00 to 24:00&#039;,&#039;7:00 to 17:00&#039;,&#039;0:00 to 7:00&#039;]})\ndata<\/code><\/pre>\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>Tariff Type<\/th>\n<th>Cents per kWh<\/th>\n<th>Time Range<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>Peak<\/td>\n<td>28<\/td>\n<td>17:00 to 24:00<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>Shoulder<\/td>\n<td>20<\/td>\n<td>7:00 to 17:00<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>Off-Peak<\/td>\n<td>12<\/td>\n<td>0:00 to 7:00<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<p>\u56e0\u6b64\uff0c\u6309\u7167\u6211\u4eec\u6b63\u5e38\u7684\u505a\u6cd5\u5c31\u662f\u4f7f\u7528apply\u65b9\u6cd5\u5199\u4e00\u4e2a\u51fd\u6570\uff0c\u51fd\u6570\u91cc\u9762\u5199\u597d\u65f6\u95f4\u6761\u4ef6\u7684\u903b\u8f91\u4ee3\u7801<\/p>\n<pre><code class=\"language-python\">def apply_tariff(kwh, hour):\n    &quot;&quot;&quot;\n    \u8ba1\u7b97\u6bcf\u4e2a\u5c0f\u65f6\u7684\u7535\u8d39\n    &quot;&quot;&quot;\n    if 0 &lt;= hour &lt; 7:\n        rate = 12\n    elif 7 &lt;= hour &lt; 17:\n        rate = 20\n    elif 17 &lt;= hour &lt; 24:\n        rate = 28\n    else:\n        raise ValueError(f&quot;Invalid hour:{hour}&quot;)\n    return rate * kwh<\/code><\/pre>\n<p>\u901a\u8fc7for\u5faa\u73af\u6765\u904d\u5386df\uff0c\u6839\u636eapply\u51fd\u6570\u903b\u8f91\u6dfb\u52a0\u65b0\u7684\u7279\u5f81\uff0c\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python\">@timeit(repeat = 3, number = 100)\ndef apply_tariff_loop(df):\n    &quot;&quot;&quot;\n    \u5faa\u73af\u8ba1\u7b97\uff0c\u4fee\u6539df\u7684\u7279\u5f81\n    &quot;&quot;&quot;\n    energy_cost_list = []\n    for i in range(len(df)):\n        energy_used = df.iloc[i][&#039;energy_kwh&#039;]\n        hour = df.iloc[i][&#039;date_time&#039;].hour\n        energy_cost = apply_tariff(energy_used, hour)\n        energy_cost_list.append(energy_cost)\n    df[&#039;cost_cents&#039;] = energy_cost_list\n\napply_tariff_loop(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_loop<\/code> ran in average of 2.291 seconds.<\/code><\/pre>\n<p>\u5bf9\u4e8e\u90a3\u4e9b\u7ecf\u5e38\u4f7f\u7528python\u7f16\u7a0b\u7684coder\u6765\u8bf4\uff0c\u8fd9\u4e2a\u8bbe\u8ba1\u770b\u8d77\u6765\u975e\u5e38\u7684\u81ea\u7136\u3002\u4f46\u662f\u8fd9\u4e2a\u5faa\u73af\u4f1a\u4e25\u91cd\u5f71\u54cd\u6548\u7387\uff0c\u4f60\u8981\u662f\u505a\u6570\u636e\u5206\u6790\uff0c\u80af\u5b9a\u662f\u4e0d\u8d5e\u6210\u8fd9\u4e48\u505a\u7684<\/p>\n<ul>\n<li>1\u3001\u5b83\u9700\u8981\u521d\u59cb\u5316\u4e00\u4e2a\u5c06\u8bb0\u5f55\u8f93\u51fa\u7684\u5217\u8868<\/li>\n<li>2\u3001\u5b83\u4f7f\u7528\u4e0d\u900f\u660e\u5bf9\u8c61\u8303\u56f4<code>(0,len(df))<\/code>\u5faa\u73af\uff0c\u7136\u540e\u5728\u5e94\u7528apply_tariff()\u4e4b\u540e\uff0c\u5b83\u5fc5\u987b\u5c06\u7ed3\u679c\u9644\u52a0\u5230\u7528\u4e8e\u521b\u5efa\u65b0DataFrame\u5217\u7684\u5217\u8868\u4e2d\u3002\u5b83\u8fd8\u4f7f\u7528df.iloc [i] [&#8216;date_time&#8217;]\u6267\u884c\u6240\u8c13\u7684\u94fe\u5f0f\u7d22\u5f15\uff0c\u8fd9\u901a\u5e38\u4f1a\u5bfc\u81f4\u610f\u5916\u7684\u7ed3\u679c\u3002<\/li>\n<li>3\u3001\u8fd9\u79cd\u65b9\u6cd5\u6700\u5927\u7684\u95ee\u9898\u662f\u8ba1\u7b97\u7684\u65f6\u95f4\u6210\u672c\u3002\u5bf9\u4e8e\u516b\u5343\u591a\u884c\u7684\u6570\u636e\uff0c\u4f7f\u7528\u5faa\u73af\u82b1\u4e862.29\u79d2\u949f\u3002<\/li>\n<\/ul>\n<h1>\u4f7f\u7528itertuples()\u548citerrows()\u5faa\u73af<\/h1>\n<p>\u53ef\u80fd\u6709\u4e9b\u540c\u5b66\u770b\u5230\u8fd9\u4e24\u4e2a\u65b9\u6cd5\u6709\u4e9b\u719f\u6089\uff0c\u6211\u4eec\u901a\u8fc7pandas\u5bfc\u5165itertuples\u548citerrows\u65b9\u6cd5\u53ef\u4ee5\u4f7f\u6548\u7387\u66f4\u5feb\u3002\u8fd9\u4e9b\u90fd\u662f\u4e00\u6b21\u4ea7\u751f\u4e00\u884c\u7684\u751f\u4ea7\u5668\u65b9\u6cd5\uff0c\u7c7b\u4f3c\u4e0epython\u751f\u6210\u5668\u4e2d\u7684yield\u7528\u6cd5  <\/p>\n<p>.itertuples\u4e3a\u6bcf\u4e00\u884c\u4ea7\u751f\u4e00\u4e2anamedtuple\uff0c\u5e76\u4e14\u884c\u7684\u7d22\u5f15\u503c\u4f5c\u4e3a\u5143\u7ec4\u7684\u7b2c\u4e00\u4e2a\u5143\u7d20\u3002nametuple\u662fPython\u7684collections\u6a21\u5757\u4e2d\u7684\u4e00\u79cd\u6570\u636e\u7ed3\u6784\uff0c\u5176\u884c\u4e3a\u7c7b\u4f3c\u4e8ePython\u5143\u7ec4\uff0c\u4f46\u5177\u6709\u53ef\u901a\u8fc7\u5c5e\u6027\u67e5\u627e\u8bbf\u95ee\u7684\u5b57\u6bb5\u3002  <\/p>\n<p>.iterrows\u4e3aDataFrame\u4e2d\u7684\u6bcf\u4e00\u884c\u4ea7\u751f\uff08index\uff0cseries\uff09\u8fd9\u6837\u7684\u5143\u7ec4\u3002  <\/p>\n<p>\u867d\u7136.itertuples\u5f80\u5f80\u4f1a\u66f4\u5feb\u4e00\u4e9b\uff0c\u4f46\u662f\u5728\u8fd9\u4e2a\u4f8b\u5b50\u4e2d\u4f7f\u7528.iterrows\uff0c\u6211\u4eec\u770b\u770b\u8fd9\u4f7f\u7528iterrows\u540e\u6548\u679c\u5982\u4f55\u3002  <\/p>\n<pre><code class=\"language-python\">@timeit(repeat = 3, number = 100)\ndef apply_tariff_iterrows(df):\n    energy_cost_list = []\n    for index, row in df.iterrows():\n         # \u83b7\u53d6\u7528\u7535\u91cf\u548c\u65f6\u95f4\uff08\u5c0f\u65f6\uff09\n        energy_used = row[&#039;energy_kwh&#039;]\n        hour = row[&#039;date_time&#039;].hour\n        # \u6dfb\u52a0cost\u5217\u8868\n        energy_cost = apply_tariff(energy_used, hour)\n        energy_cost_list.append(energy_cost)\n    df[&#039;cost_cents&#039;] = energy_cost_list\n\napply_tariff_iterrows(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_iterrows<\/code> ran in average of 0.674 seconds.<\/code><\/pre>\n<p>\u8bed\u6cd5\u65b9\u9762\uff1a\u8fd9\u79cd\u7c7b\u578b\u7684\u8bed\u6cd5\u66f4\u52a0\u660e\u786e\uff0c\u5e76\u4e14\u884c\u503c\u5f15\u7528\u4e2d\u7684\u6df7\u4e71\u66f4\u5c11\uff0c\u56e0\u6b64\u5b83\u66f4\u5177\u6709\u53ef\u8bfb\u6027  <\/p>\n<p>\u65f6\u95f4\u6536\u76ca\u65b9\u9762\uff1a\u5feb\u4e86\u5c06\u8fd13.5\u500d\uff0c\u4f46\u662f\u6211\u4eec\u8fd8\u6709\u6539\u8fdb\u7684\u7a7a\u95f4\u3002\u56e0\u4e3a\u6211\u4eec\u4f9d\u7136\u5728\u4f7f\u7528python\u7684for\u5faa\u73af\uff0c\u8fd9\u5c31\u610f\u5473\u7740\u6bcf\u4e2a\u51fd\u6570\u8c03\u7528\u90fd\u662f\u5728python\u4e2d\u5b8c\u6210\u7684\uff0c\u7406\u60f3\u60c5\u51b5\u662f\u5b83\u53ef\u4ee5\u7528pandas\u5185\u90e8\u67b6\u6784\u4e2d\u5185\u7f6e\u7684\u66f4\u5feb\u7684\u8bed\u8a00\u5b8c\u6210<\/p>\n<h1>Pandas\u7684.apply()\u65b9\u6cd5<\/h1>\n<p>\u4f7f\u7528.apply\u65b9\u6cd5\u800c\u4e0d\u662f.iterrows\u8fdb\u4e00\u6b65\u6539\u8fdb\u6b64\u64cd\u4f5c\u3002Pandas\u7684.apply\u65b9\u6cd5\u63a5\u53d7\u51fd\u6570(callables)\u5e76\u6cbfDataFrame\u7684\u8f74(\u6240\u6709\u884c\u6216\u6240\u6709\u5217)\u5e94\u7528\u5b83\u4eec\u3002\u5728\u6b64\u793a\u4f8b\u4e2d\uff0clambda\u51fd\u6570\u5c06\u5e2e\u52a9\u4f60\u5c06\u4e24\u5217\u6570\u636e\u4f20\u9012\u7ed9apply_tariff()\uff1a<\/p>\n<pre><code class=\"language-python\">@timeit(repeat = 3, number = 100)\ndef apply_tariff_withapply(df):\n    df[&#039;cost_cents&#039;] = df.apply(\n        lambda row:apply_tariff(\n            kwh = row[&#039;energy_kwh&#039;],\n            hour = row[&#039;date_time&#039;].hour),\n            axis = 1)\napply_tariff_withapply(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_withapply<\/code> ran in average of 0.141 seconds.<\/code><\/pre>\n<p>.apply\u7684\u8bed\u6cd5\u4f18\u70b9\u5f88\u660e\u663e\uff0c\u884c\u6570\u5c11\uff0c\u4ee3\u7801\u53ef\u8bfb\u6027\u9ad8\u3002\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u6240\u82b1\u8d39\u7684\u65f6\u95f4\u5927\u7ea6\u662f.iterrows\u65b9\u6cd5\u7684\u4e94\u5206\u4e4b\u4e00\u3002<\/p>\n<p>\u4f46\u662f\uff0c\u8fd9\u8fd8\u4e0d\u662f\u201c\u975e\u5e38\u5feb\u201d\u3002\u4e00\u4e2a\u539f\u56e0\u662f.apply()\u5c06\u5728\u5185\u90e8\u5c1d\u8bd5\u5faa\u73af\u904d\u5386Cython\u8fed\u4ee3\u5668\u3002\u4f46\u662f\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u4f20\u9012\u7684lambda\u4e0d\u662f\u53ef\u4ee5\u5728Cython\u4e2d\u5904\u7406\u7684\u4e1c\u897f\uff0c\u56e0\u6b64\u5b83\u5728Python\u4e2d\u8c03\u7528\uff0c\u56e0\u6b64\u5e76\u4e0d\u662f\u90a3\u4e48\u5feb\u3002<\/p>\n<p>\u5982\u679c\u4f60\u4f7f\u7528.apply()\u83b7\u53d610\u5e74\u7684\u5c0f\u65f6\u6570\u636e\uff0c\u90a3\u4e48\u4f60\u5c06\u9700\u8981\u5927\u7ea615\u5206\u949f\u7684\u5904\u7406\u65f6\u95f4\u3002\u5982\u679c\u8fd9\u4e2a\u8ba1\u7b97\u53ea\u662f\u5927\u578b\u6a21\u578b\u7684\u4e00\u5c0f\u90e8\u5206\uff0c\u90a3\u4e48\u4f60\u771f\u7684\u5e94\u8be5\u52a0\u5feb\u901f\u5ea6\u3002\u8fd9\u4e5f\u5c31\u662f\u77e2\u91cf\u5316\u64cd\u4f5c\u6d3e\u4e0a\u7528\u573a\u7684\u5730\u65b9\u3002<\/p>\n<h1>\u77e2\u91cf\u5316\u64cd\u4f5c\uff1a\u4f7f\u7528.isin()\u9009\u62e9\u6570\u636e<\/h1>\n<p>\u77e2\u91cf\u5316\u64cd\u4f5c\u8fd9\u4e2a\u4e1c\u897f\u662f\u5b66\u4e60numpy\u3001pandas\u6bd4\u8f83\u57fa\u7840\u7684\u4e00\u4e2a\u77e5\u8bc6\u70b9\uff0c\u5728\u8fd9\u8fb9\u5c31\u4e0d\u505a\u8fc7\u591a\u4ecb\u7ecd\u4e86\u3002<br \/>\n\u5b83\u662fpandas\u4e2d\u6267\u884c\u7684\u6700\u5feb\u65b9\u6cd5\u3002<\/p>\n<pre><code class=\"language-python\"># df.set_index(&#039;date_time&#039;,inplace=True)\n\n@timeit(repeat=3,number=100)\ndef apply_tariff_isin(df):\n    peak_hours = df.index.hour.isin(range(17,24))\n    shoulder_hours = df.index.hour.isin(range(7,17))\n    off_peak_hours = df.index.hour.isin(range(0,7))\n\n    df.loc[peak_hours, &#039;cost_cents&#039;] = df.loc[peak_hours, &#039;energy_kwh&#039;] * 28\n    df.loc[shoulder_hours, &#039;cost_cents&#039;] = df.loc[shoulder_hours, &#039;energy_kwh&#039;] * 20\n    df.loc[off_peak_hours, &#039;cost_cents&#039;] = df.loc[off_peak_hours, &#039;energy_kwh&#039;] * 12\n\napply_tariff_isin(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_isin<\/code> ran in average of 0.003 seconds.<\/code><\/pre>\n<p>isin()\u65b9\u6cd5\u8fd4\u56de\u7684\u662f\u4e00\u4e2a\u5e03\u5c14\u503c\u6570\u7ec4\uff0c\u5982\u4e0b\u6240\u793a\uff1a<\/p>\n<pre><code class=\"language-python\">[False, False, False, ...,True ,True ,True]<\/code><\/pre>\n<p>\u8fd9\u4e9b\u503c\u6807\u8bc6\u54ea\u4e9bDataFrame\u7d22\u5f15(datetimes)\u843d\u5728\u6307\u5b9a\u7684\u5c0f\u65f6\u8303\u56f4\u5185\u3002\u7136\u540e\uff0c\u5f53\u4f60\u5c06\u8fd9\u4e9b\u5e03\u5c14\u6570\u7ec4\u4f20\u9012\u7ed9DataFrame\u7684.loc\u7d22\u5f15\u5668\u65f6\uff0c\u4f60\u5c06\u83b7\u5f97\u4e00\u4e2a\u4ec5\u5305\u542b\u4e0e\u8fd9\u4e9b\u5c0f\u65f6\u5339\u914d\u7684\u884c\u7684DataFrame\u5207\u7247\u3002\u5728\u90a3\u4e4b\u540e\uff0c\u4ec5\u4ec5\u662f\u5c06\u5207\u7247\u4e58\u4ee5\u9002\u5f53\u7684\u8d39\u7387\uff0c\u8fd9\u662f\u4e00\u79cd\u5feb\u901f\u7684\u77e2\u91cf\u5316\u64cd\u4f5c\u3002<\/p>\n<p>\u8fd9\u4e0e\u6211\u4eec\u4e0a\u9762\u7684\u5faa\u73af\u64cd\u4f5c\u76f8\u6bd4\u5982\u4f55\uff1f\u9996\u5148\uff0c\u4f60\u53ef\u80fd\u4f1a\u6ce8\u610f\u5230\u4e0d\u518d\u9700\u8981apply_tariff()\uff0c\u56e0\u4e3a\u6240\u6709\u6761\u4ef6\u903b\u8f91\u90fd\u5e94\u7528\u4e8e\u884c\u7684\u9009\u62e9\u3002\u56e0\u6b64\uff0c\u4f60\u5fc5\u987b\u7f16\u5199\u7684\u4ee3\u7801\u884c\u548c\u8c03\u7528\u7684Python\u4ee3\u7801\u4f1a\u5927\u5927\u51cf\u5c11\u3002<\/p>\n<p>\u5904\u7406\u65f6\u95f4\u600e\u4e48\u6837\uff1f\u6bd4\u4e0d\u662fPythonic\u7684\u5faa\u73af\u5feb764\u500d\uff0c\u6bd4.iterrows\u5feb224\u500d\uff0c\u6bd4.apply\u5feb47\u500d\u3002<\/p>\n<h1>\u54b1\u4eec\u8fd8\u80fd\u505a\u7684\u66f4\u597d\u5417<\/h1>\n<p>\u5176\u5b9e\u901a\u8fc7\u4ee5\u4e0a\u7684\u5bf9\u6bd4\uff0c\u6211\u4eec\u4f1a\u53d1\u73b0\u5b83\u7684\u65f6\u95f4\u6548\u7387\u8fdb\u6b65\u662f\u975e\u5e38\u5927\u7684\uff0c\u4f46\u662f\u8fd9\u5c31\u5b8c\u4e86\u5417\uff1f <\/p>\n<p>\u5728apply_tariff_isin\u4e2d\uff0c\u6211\u4eec\u4ecd\u7136\u53ef\u4ee5\u901a\u8fc7\u8c03\u7528df.loc\u548cdf.index.hour.isin\u4e09\u6b21\u6765\u8fdb\u884c\u4e00\u4e9b\u201c\u624b\u52a8\u5de5\u4f5c\u201d\u3002\u5982\u679c\u6211\u4eec\u6709\u66f4\u7cbe\u7ec6\u7684\u65f6\u9699\u8303\u56f4\uff0c\u4f60\u53ef\u80fd\u4f1a\u4e89\u8fa9\u8bf4\u8fd9\u4e2a\u89e3\u51b3\u65b9\u6848\u662f\u4e0d\u53ef\u6269\u5c55\u7684\u3002\u5e78\u8fd0\u7684\u662f\uff0c\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u4f60\u53ef\u4ee5\u4f7f\u7528Pandas\u7684pd.cut()\u51fd\u6570\u8fdb\u884c\u79bb\u6563\u5316\u5206\u7bb1\uff0c\u4ee5\u7f16\u7a0b\u65b9\u5f0f\u6267\u884c\u66f4\u591a\u64cd\u4f5c\uff1a<\/p>\n<pre><code class=\"language-python\">@timeit(repeat=3, number=100)\ndef apply_tariff_cut(df):\n    cents_per_kwh = pd.cut(x=df.index.hour,\n                           bins=[0, 7, 17, 24],\n                           include_lowest=True,\n                           labels=[12, 20, 28]).astype(int)\n    df[&#039;cost_cents&#039;] = cents_per_kwh * df[&#039;energy_kwh&#039;]\n\napply_tariff_cut(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_cut<\/code> ran in average of 0.001 seconds.<\/code><\/pre>\n<p>\u5230\u76ee\u524d\u4e3a\u6b62\uff0c\u65f6\u95f4\u4e0a\u57fa\u672c\u5df2\u7ecf\u5feb\u8981\u5230\u8fbe\u6781\u9650\u4e86\uff0c\u53ea\u82b1\u8d39\u4e860.001\u79d2\u7684\u65f6\u95f4\u6765\u5904\u7406\u5b8c\u6574\u7684\u5341\u5e74\u7684\u5c0f\u65f6\u6570\u636e\u96c6\u3002\u6700\u540e\u4e00\u4e2a\u9009\u9879\u662f\u4f7f\u7528 NumPy \u51fd\u6570\u6765\u64cd\u4f5c\u6bcf\u4e2aDataFrame\u7684\u5e95\u5c42NumPy\u6570\u7ec4\uff0c\u7136\u540e\u5c06\u7ed3\u679c\u96c6\u6210\u56dePandas\u6570\u636e\u7ed3\u6784\u4e2d\u3002<\/p>\n<h1>\u7ed3\u5408numpy\u7ee7\u7eed\u52a0\u901f<\/h1>\n<p>\u6211\u4eec\u5728\u73a9pandas\u7684\u65f6\u5019\u80af\u5b9a\u4e0d\u80fd\u5fd8\u8bb0\u7684\u4e00\u70b9\u5c31\u662f\uff0c\u5b83\u662f\u57fa\u4e8enumpy\u8fdb\u884c\u7f16\u5199\u7684\uff0c\u5b83\u6700\u5e38\u7528\u7684\u4e24\u79cd\u6570\u636e\u7ed3\u6784<code>DataFrame<\/code>\u548c<code>Series<\/code>\u90fd\u662f\u5728numpy\u5e93\u4e4b\u4e0a\u8bbe\u8ba1\u7684\u3002\u8fd9\u4f7f\u5f97\u6211\u4eec\u8ba1\u7b97\u66f4\u52a0\u7075\u6d3b\uff0c\u56e0\u4e3apandas\u53ef\u4ee5\u548cnumpy\u7684\u77e9\u9635\u548c\u64cd\u4f5c\u65e0\u7f1d\u8854\u63a5  <\/p>\n<p>\u63a5\u4e0b\u6765\uff0c\u6211\u4eec\u5c06\u4f7f\u7528NumPy\u7684 digitize() \u51fd\u6570\u3002\u5b83\u7c7b\u4f3c\u4e8ePandas\u7684cut()\uff0c\u56e0\u4e3a\u6570\u636e\u5c06\u88ab\u5206\u7bb1\uff0c\u4f46\u8fd9\u6b21\u5b83\u5c06\u7531\u4e00\u4e2a\u7d22\u5f15\u6570\u7ec4\u8868\u793a\uff0c\u8fd9\u4e9b\u7d22\u5f15\u8868\u793a\u6bcf\u5c0f\u65f6\u6240\u5c5e\u7684bin\u3002\u7136\u540e\u5c06\u8fd9\u4e9b\u7d22\u5f15\u5e94\u7528\u4e8e\u4ef7\u683c\u6570\u7ec4\uff1a<\/p>\n<pre><code class=\"language-python\">@timeit(repeat=3, number=100)\ndef apply_tariff_digitize(df):\n    prices = np.array([12, 20, 28])\n    bins = np.digitize(df.index.hour.values, bins=[7, 17, 24])\n    df[&#039;cost_cents&#039;] = prices[bins] * df[&#039;energy_kwh&#039;].values\n\napply_tariff_digitize(df)<\/code><\/pre>\n<pre><code>Best of 3 trials with 100 function calls per trial:\nFunction <code>apply_tariff_digitize<\/code> ran in average of 0.001 seconds.<\/code><\/pre>\n<p>\u4e0epandas\u7684cut()\u51fd\u6570\u4e00\u6837\uff0c\u8fd9\u79cd\u8bed\u6cd5\u975e\u5e38\u7b80\u6d01\u6613\u8bfb\u3002  <\/p>\n<p>\u8fd0\u884c\u540e\u65f6\u95f4\u663e\u793a\u4f9d\u7136\u662f0.001s\uff0c\u5176\u5b9e\u6027\u80fd\u662f\u6709\u63d0\u5347\u7684\uff0c\u53ea\u4e0d\u8fc7\u65f6\u95f4\u663e\u793a\u7684\u95ee\u9898\u3002\u4f7f\u7528pandas\uff0c\u53ef\u4ee5\u5e2e\u52a9\u7ef4\u6301<code>\u5c42\u6b21\u7ed3\u6784<\/code>,\u5982\u679c\u4f60\u60f3\u7684\u8bdd\uff0c\u53ef\u4ee5\u50cf\u5728\u6b64\u5904\u4e00\u6837\u8fdb\u884c\u6279\u91cf\u8ba1\u7b97\uff0c<\/p>\n<ul>\n<li>\u4f7f\u7528\u5411\u91cf\u5316\u64cd\u4f5c\uff1a\u6ca1\u6709for\u5faa\u73af\u7684Pandas\u65b9\u6cd5\u548c\u51fd\u6570\u3002<\/li>\n<li>\u5c06.apply\u65b9\u6cd5\uff1a\u4e0e\u53ef\u8c03\u7528\u65b9\u6cd5\u4e00\u8d77\u4f7f\u7528\u3002<\/li>\n<li>\u4f7f\u7528.itertuples\uff1a\u4ecePython\u7684\u96c6\u5408\u6a21\u5757\u8fed\u4ee3DataFrame\u884c\u4f5c\u4e3anamedTuples\u3002<\/li>\n<li>\u4f7f\u7528.iterrows\uff1a\u8fed\u4ee3DataFrame\u884c\u4f5c\u4e3a(index\uff0cSeries)\u5bf9\u3002\u867d\u7136Pandas\u7cfb\u5217\u662f\u4e00\u79cd\u7075\u6d3b\u7684\u6570\u636e\u7ed3\u6784\uff0c\u4f46\u5c06\u6bcf\u4e00\u884c\u6784\u5efa\u5230\u4e00\u4e2a\u7cfb\u5217\u4e2d\u7136\u540e\u8bbf\u95ee\u5b83\u53ef\u80fd\u4f1a\u5f88\u6602\u8d35\u3002<\/li>\n<li>\u4f7f\u7528\u201celement-by-element\u201d\u5faa\u73af\uff1a\u4f7f\u7528df.loc\u6216df.iloc\u4e00\u6b21\u66f4\u65b0\u4e00\u4e2a\u5355\u5143\u683c\u6216\u884c\u3002<\/li>\n<\/ul>\n<blockquote>\n<p>\u4e0a\u9762\u7684\u4f18\u5148\u987a\u5e8f\u662fpandas\u5f00\u53d1\u4eba\u5458\u7684\u5efa\u8bae<\/p>\n<\/blockquote>\n<h1>\u4f7f\u7528HDFStore\u9632\u6b62\u91cd\u65b0\u5904\u7406<\/h1>\n<p>\u73b0\u5728\u4f60\u5df2\u7ecf\u4e86\u89e3\u4e86Pandas\u4e2d\u7684\u52a0\u901f\u6570\u636e\u6d41\u7a0b\uff0c\u63a5\u7740\u8ba9\u6211\u4eec\u63a2\u8ba8\u5982\u4f55\u907f\u514d\u4e0e\u6700\u8fd1\u96c6\u6210\u5230Pandas\u4e2d\u7684HDFStore\u4e00\u8d77\u91cd\u65b0\u5904\u7406\u65f6\u95f4\u3002<\/p>\n<p>\u901a\u5e38\uff0c\u5728\u6784\u5efa\u590d\u6742\u6570\u636e\u6a21\u578b\u65f6\uff0c\u53ef\u4ee5\u65b9\u4fbf\u5730\u5bf9\u6570\u636e\u8fdb\u884c\u4e00\u4e9b\u9884\u5904\u7406\u3002\u4f8b\u5982\uff0c\u5982\u679c\u60a8\u670910\u5e74\u7684\u5206\u949f\u9891\u7387\u8017\u7535\u91cf\u6570\u636e\uff0c\u5373\u4f7f\u4f60\u6307\u5b9a\u683c\u5f0f\u53c2\u6570\uff0c\u53ea\u9700\u5c06\u65e5\u671f\u548c\u65f6\u95f4\u8f6c\u6362\u4e3a\u65e5\u671f\u65f6\u95f4\u53ef\u80fd\u9700\u898120\u5206\u949f\u3002\u4f60\u771f\u7684\u53ea\u60f3\u505a\u4e00\u6b21\uff0c\u800c\u4e0d\u662f\u6bcf\u6b21\u8fd0\u884c\u4f60\u7684\u6a21\u578b\uff0c\u8fdb\u884c\u6d4b\u8bd5\u6216\u5206\u6790\u3002<\/p>\n<p>\u4f60\u53ef\u4ee5\u5728\u6b64\u5904\u6267\u884c\u7684\u4e00\u9879\u975e\u5e38\u6709\u7528\u7684\u64cd\u4f5c\u662f\u9884\u5904\u7406\uff0c\u7136\u540e\u5c06\u6570\u636e\u5b58\u50a8\u5728\u5df2\u5904\u7406\u7684\u8868\u5355\u4e2d\uff0c\u4ee5\u4fbf\u5728\u9700\u8981\u65f6\u4f7f\u7528\u3002\u4f46\u662f\uff0c\u5982\u4f55\u4ee5\u6b63\u786e\u7684\u683c\u5f0f\u5b58\u50a8\u6570\u636e\u800c\u65e0\u9700\u518d\u6b21\u91cd\u65b0\u5904\u7406\uff1f\u5982\u679c\u4f60\u8981\u53e6\u5b58\u4e3aCSV\uff0c\u5219\u53ea\u4f1a\u4e22\u5931datetimes\u5bf9\u8c61\uff0c\u5e76\u4e14\u5728\u518d\u6b21\u8bbf\u95ee\u65f6\u5fc5\u987b\u91cd\u65b0\u5904\u7406\u5b83\u3002<\/p>\n<p>Pandas\u6709\u4e00\u4e2a\u5185\u7f6e\u7684\u89e3\u51b3\u65b9\u6848\uff0c\u5b83\u4f7f\u7528 HDF5\uff0c\u8fd9\u662f\u4e00\u79cd\u4e13\u95e8\u7528\u4e8e\u5b58\u50a8\u8868\u683c\u6570\u636e\u9635\u5217\u7684\u9ad8\u6027\u80fd\u5b58\u50a8\u683c\u5f0f\u3002 Pandas\u7684 HDFStore \u7c7b\u5141\u8bb8\u4f60\u5c06DataFrame\u5b58\u50a8\u5728HDF5\u6587\u4ef6\u4e2d\uff0c\u4ee5\u4fbf\u53ef\u4ee5\u6709\u6548\u5730\u8bbf\u95ee\u5b83\uff0c\u540c\u65f6\u4ecd\u4fdd\u7559\u5217\u7c7b\u578b\u548c\u5176\u4ed6\u5143\u6570\u636e\u3002\u5b83\u662f\u4e00\u4e2a\u7c7b\u4f3c\u5b57\u5178\u7684\u7c7b\uff0c\u56e0\u6b64\u60a8\u53ef\u4ee5\u50cf\u8bfb\u53d6Python dict\u5bf9\u8c61\u4e00\u6837\u8fdb\u884c\u8bfb\u5199\u3002<\/p>\n<p>\u4ee5\u4e0b\u662f\u5c06\u9884\u5904\u7406\u7535\u529b\u6d88\u8017DataFrame df\u5b58\u50a8\u5728HDF5\u6587\u4ef6\u4e2d\u7684\u65b9\u6cd5\uff1a<\/p>\n<pre><code class=\"language-python\"># \u521b\u5efa\u50a8\u5b58\u5bf9\u8c61\uff0c\u5e76\u5b58\u4e3a processed_data\ndata_store = pd.HDFStore(&#039;processed_data.h5&#039;)\n\n# \u5c06 DataFrame \u653e\u8fdb\u5bf9\u8c61\u4e2d\uff0c\u5e76\u8bbe\u7f6e key \u4e3a preprocessed_df\ndata_store[&#039;preprocessed_df&#039;] = df\ndata_store.close()<\/code><\/pre>\n<p>\u73b0\u5728\uff0c\u4f60\u53ef\u4ee5\u5173\u95ed\u8ba1\u7b97\u673a\u5e76\u4f11\u606f\u4e00\u4e0b\u3002\u7b49\u4f60\u56de\u6765\u7684\u65f6\u5019\uff0c\u4f60\u5904\u7406\u7684\u6570\u636e\u5c06\u5728\u4f60\u9700\u8981\u65f6\u4e3a\u4f60\u6240\u7528\uff0c\u800c\u65e0\u9700\u518d\u6b21\u52a0\u5de5\u3002\u4ee5\u4e0b\u662f\u5982\u4f55\u4eceHDF5\u6587\u4ef6\u8bbf\u95ee\u6570\u636e\uff0c\u5e76\u4fdd\u7559\u6570\u636e\u7c7b\u578b\uff1a<\/p>\n<pre><code class=\"language-python\"># \u83b7\u53d6\u6570\u636e\u50a8\u5b58\u5bf9\u8c61\ndata_store = pd.HDFStore(&#039;processed_data.h5&#039;)\n\n# \u901a\u8fc7key\u83b7\u53d6\u6570\u636e\npreprocessed_df = data_store[&#039;preprocessed_df&#039;]\ndata_store.close()<\/code><\/pre>\n<p>\u6570\u636e\u5b58\u50a8\u53ef\u4ee5\u5bb9\u7eb3\u591a\u4e2a\u8868\uff0c\u6bcf\u4e2a\u8868\u7684\u540d\u79f0\u4f5c\u4e3a\u952e\u3002<\/p>\n<p>\u5173\u4e8e\u5728Pandas\u4e2d\u4f7f\u7528HDFStore\u7684\u6ce8\u610f\u4e8b\u9879\uff1a\u60a8\u9700\u8981\u5b89\u88c5PyTables&gt; = 3.0.0\uff0c\u56e0\u6b64\u5728\u5b89\u88c5Pandas\u4e4b\u540e\uff0c\u8bf7\u786e\u4fdd\u66f4\u65b0PyTables\uff0c\u5982\u4e0b\u6240\u793a\uff1a<\/p>\n<pre><code class=\"language-python\"># pip install --upgrade tables<\/code><\/pre>\n<h1>\u7ed3\u8bba<\/h1>\n<p>\u5982\u679c\u4f60\u89c9\u5f97\u4f60\u7684Pandas\u9879\u76ee\u4e0d\u591f\u5feb\u901f\uff0c\u7075\u6d3b\uff0c\u7b80\u5355\u548c\u76f4\u89c2\uff0c\u8bf7\u8003\u8651\u91cd\u65b0\u8003\u8651\u4f60\u4f7f\u7528\u8be5\u5e93\u7684\u65b9\u5f0f\u3002<\/p>\n<p>\u8fd9\u91cc\u63a2\u8ba8\u7684\u793a\u4f8b\u76f8\u5f53\u7b80\u5355\uff0c\u4f46\u8bf4\u660e\u4e86Pandas\u529f\u80fd\u7684\u6b63\u786e\u5e94\u7528\u5982\u4f55\u80fd\u591f\u5927\u5927\u6539\u8fdb\u8fd0\u884c\u65f6\u548c\u901f\u5ea6\u7684\u4ee3\u7801\u53ef\u8bfb\u6027\u3002\u4ee5\u4e0b\u662f\u4e00\u4e9b\u7ecf\u9a8c\uff0c\u53ef\u4ee5\u5728\u4e0b\u6b21\u4f7f\u7528Pandas\u4e2d\u7684\u5927\u578b\u6570\u636e\u96c6\u65f6\u5e94\u7528\u8fd9\u4e9b\u7ecf\u9a8c\u6cd5\u5219\uff1a<\/p>\n<p>\u5c1d\u8bd5\u5c3d\u53ef\u80fd\u4f7f\u7528\u77e2\u91cf\u5316\u64cd\u4f5c\uff0c\u800c\u4e0d\u662f\u5728df \u4e2d\u89e3\u51b3for x\u7684\u95ee\u9898\u3002\u5982\u679c\u4f60\u7684\u4ee3\u7801\u662f\u8bb8\u591afor\u5faa\u73af\uff0c\u90a3\u4e48\u5b83\u53ef\u80fd\u66f4\u9002\u5408\u4f7f\u7528\u672c\u673aPython\u6570\u636e\u7ed3\u6784\uff0c\u56e0\u4e3aPandas\u4f1a\u5e26\u6765\u5f88\u591a\u5f00\u9500\u3002<br \/>\n\u5982\u679c\u4f60\u6709\u66f4\u590d\u6742\u7684\u64cd\u4f5c\uff0c\u5176\u4e2d\u77e2\u91cf\u5316\u6839\u672c\u4e0d\u53ef\u80fd\u6216\u592a\u96be\u4ee5\u6709\u6548\u5730\u89e3\u51b3\uff0c\u8bf7\u4f7f\u7528.apply\u65b9\u6cd5\u3002<br \/>\n\u5982\u679c\u5fc5\u987b\u5faa\u73af\u904d\u5386\u6570\u7ec4\uff08\u786e\u5b9e\u53d1\u751f\u4e86\u8fd9\u79cd\u60c5\u51b5\uff09\uff0c\u8bf7\u4f7f\u7528.iterrows()\u6216.itertuples()\u6765\u63d0\u9ad8\u901f\u5ea6\u548c\u8bed\u6cd5\u3002<br \/>\nPandas\u6709\u5f88\u591a\u53ef\u9009\u6027\uff0c\u51e0\u4e4e\u603b\u6709\u51e0\u79cd\u65b9\u6cd5\u53ef\u4ee5\u4eceA\u5230B\u3002\u8bf7\u6ce8\u610f\u8fd9\u4e00\u70b9\uff0c\u6bd4\u8f83\u4e0d\u540c\u65b9\u6cd5\u7684\u6267\u884c\u65b9\u5f0f\uff0c\u5e76\u9009\u62e9\u5728\u9879\u76ee\u73af\u5883\u4e2d\u6548\u679c\u6700\u4f73\u7684\u8def\u7ebf\u3002<br \/>\n\u4e00\u65e6\u5efa\u7acb\u4e86\u6570\u636e\u6e05\u7406\u811a\u672c\uff0c\u5c31\u53ef\u4ee5\u901a\u8fc7\u4f7f\u7528HDFStore\u5b58\u50a8\u4e2d\u95f4\u7ed3\u679c\u6765\u907f\u514d\u91cd\u65b0\u5904\u7406\u3002<br \/>\n\u5c06NumPy\u96c6\u6210\u5230Pandas\u64cd\u4f5c\u4e2d\u901a\u5e38\u53ef\u4ee5\u63d0\u9ad8\u901f\u5ea6\u5e76\u7b80\u5316\u8bed\u6cd5\u3002<\/p>\n<blockquote>\n<p>\u53c2\u8003:<a href=\"https:\/\/realpython.com\/fast-flexible-pandas\/\">https:\/\/realpython.com\/fast-flexible-pandas\/<\/a><\/p>\n<\/blockquote>\n","protected":false},"excerpt":{"rendered":"<p>\u524d\u8a00 \u5982\u679c\u4f60\u73b0\u5728\u6b63\u5728\u5b66\u4e60\u6570\u636e\u5206\u6790\uff0c\u6216\u8005\u6b63\u5728\u4ece\u4e8b\u6570\u636e\u5206\u6790\u884c\u4e1a\uff0c\u80af\u5b9a\u4f1a\u5904\u7406\u4e00\u4e9b\u5927\u6570\u636e\u96c6\u3002pandas\u5c31\u662f\u8fd9\u4e9b\u5927\u6570\u636e [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[313,310],"tags":[],"_links":{"self":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3667"}],"collection":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3667"}],"version-history":[{"count":0,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3667\/revisions"}],"wp:attachment":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3667"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3667"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3667"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}