{"id":3289,"date":"2022-02-27T14:35:01","date_gmt":"2022-02-27T06:35:01","guid":{"rendered":"https:\/\/egonlin.com\/?p=3289"},"modified":"2022-02-27T14:35:01","modified_gmt":"2022-02-27T06:35:01","slug":"%e7%ac%ac%e5%85%ad%e8%8a%82%ef%bc%9a%e7%bb%86%e5%88%86%e6%9e%84%e5%bb%ba%e6%9c%ba%e5%99%a8%e5%ad%a6%e4%b9%a0%e5%ba%94%e7%94%a8%e7%a8%8b%e5%ba%8f%e7%9a%84%e6%b5%81%e7%a8%8b-%e6%95%b0%e6%8d%ae%e9%a2%84","status":"publish","type":"post","link":"https:\/\/egonlin.com\/?p=3289","title":{"rendered":"\u7b2c\u516d\u8282\uff1a\u7ec6\u5206\u6784\u5efa\u673a\u5668\u5b66\u4e60\u5e94\u7528\u7a0b\u5e8f\u7684\u6d41\u7a0b-\u6570\u636e\u9884\u5904\u7406"},"content":{"rendered":"<h1>\u7ec6\u5206\u6784\u5efa\u673a\u5668\u5b66\u4e60\u5e94\u7528\u7a0b\u5e8f\u7684\u6d41\u7a0b-\u6570\u636e\u9884\u5904\u7406<\/h1>\n<p>sklearn\u6570\u636e\u9884\u5904\u7406\u5b98\u65b9\u6587\u6863\u5730\u5740\uff1a<a href=\"https:\/\/scikit-learn.org\/stable\/modules\/classes.html#module-sklearn.preprocessing\">https:\/\/scikit-learn.org\/stable\/modules\/classes.html#module-sklearn.preprocessing<\/a><\/p>\n<h1>1.1 \u7f3a\u5931\u503c\u5904\u7406<\/h1>\n<pre><code class=\"language-python\">import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom matplotlib.font_manager import FontProperties\nfrom sklearn import datasets\n%matplotlib inline\nfont = FontProperties(fname=&#039;\/Library\/Fonts\/Heiti.ttc&#039;)<\/code><\/pre>\n<p>&emsp;&emsp;\u73b0\u5b9e\u751f\u6d3b\u4e2d\u7684\u6570\u636e\u5f80\u5f80\u662f\u4e0d\u5168\u9762\u7684\uff0c\u5f88\u591a\u6837\u672c\u7684\u5c5e\u6027\u503c\u4f1a\u6709\u7f3a\u5931\uff0c\u4f8b\u5982\u67d0\u4e2a\u4eba\u586b\u5199\u7684\u4e2a\u4eba\u4fe1\u606f\u4e0d\u5b8c\u6574\u6216\u8005\u5bf9\u4e2a\u4eba\u9690\u79c1\u7684\u4fdd\u62a4\u653f\u7b56\u5bfc\u81f4\u5efa\u6a21\u65f6\u53ef\u80fd\u65e0\u6cd5\u5f97\u5230\u6240\u9700\u8981\u7684\u7279\u5f81\uff0c\u5c24\u5176\u662f\u5728\u6570\u636e\u91cf\u8f83\u5927\u65f6\uff0c\u8fd9\u79cd\u7f3a\u5931\u503c\u7684\u4ea7\u751f\u4f1a\u5bf9\u6a21\u578b\u7684\u6027\u80fd\u9020\u6210\u5f88\u5927\u7684\u5f71\u54cd\u3002\u63a5\u4e0b\u6765\u5c06\u901a\u8fc7\u9e22\u5c3e\u82b1\u6570\u636e\u8ba8\u8bba\u7f3a\u5931\u503c\u5904\u7406\u7684\u65b9\u6cd5\u3002<\/p>\n<pre><code class=\"language-python\"># \u7f3a\u5931\u503c\u5904\u7406\u793a\u4f8b\nfrom io import StringIO\n\niris_data = &#039;&#039;&#039;\n5.1,,1.4,0.2\n4.9,3.0,1.4,0.2\n4.7,3.2,,0.2\n7.0,3.2,4.7,1.4\n6.4,3.2,4.5,1.5\n6.9,3.1,4.9,\n,,,\n&#039;&#039;&#039;\n\niris = datasets.load_iris()\ndf = pd.read_csv(StringIO(iris_data), header=None)\ndf.columns = iris.feature_names\ndf = df.iloc[:, :4]\ndf<\/code><\/pre>\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }<\/p>\n<p>    .dataframe tbody tr th {\n        vertical-align: top;\n    }<\/p>\n<p>    .dataframe thead th {\n        text-align: right;\n    }\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>sepal length (cm)<\/th>\n<th>sepal width (cm)<\/th>\n<th>petal length (cm)<\/th>\n<th>petal width (cm)<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>5.1<\/td>\n<td>NaN<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>4.9<\/td>\n<td>3.0<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>4.7<\/td>\n<td>3.2<\/td>\n<td>NaN<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>7.0<\/td>\n<td>3.2<\/td>\n<td>4.7<\/td>\n<td>1.4<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>6.4<\/td>\n<td>3.2<\/td>\n<td>4.5<\/td>\n<td>1.5<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>6.9<\/td>\n<td>3.1<\/td>\n<td>4.9<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>6<\/th>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<h2>1.1.1 \u5220\u9664\u7f3a\u5931\u503c<\/h2>\n<pre><code class=\"language-python\"># axis=0\u5220\u9664\u6709NaN\u503c\u7684\u884c\uff0caxis=1\u5220\u9664\u6709NaN\u503c\u7684\u5217\ndf.dropna(axis=0)<\/code><\/pre>\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }<\/p>\n<p>    .dataframe tbody tr th {\n        vertical-align: top;\n    }<\/p>\n<p>    .dataframe thead th {\n        text-align: right;\n    }\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>sepal length (cm)<\/th>\n<th>sepal width (cm)<\/th>\n<th>petal length (cm)<\/th>\n<th>petal width (cm)<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>1<\/th>\n<td>4.9<\/td>\n<td>3.0<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>7.0<\/td>\n<td>3.2<\/td>\n<td>4.7<\/td>\n<td>1.4<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>6.4<\/td>\n<td>3.2<\/td>\n<td>4.5<\/td>\n<td>1.5<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<pre><code class=\"language-python\"># \u5220\u9664\u5168\u4e3aNaN\u503c\u5f97\u884c\u6216\u5217\ndf.dropna(how=&#039;all&#039;)<\/code><\/pre>\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }<\/p>\n<p>    .dataframe tbody tr th {\n        vertical-align: top;\n    }<\/p>\n<p>    .dataframe thead th {\n        text-align: right;\n    }\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>sepal length (cm)<\/th>\n<th>sepal width (cm)<\/th>\n<th>petal length (cm)<\/th>\n<th>petal width (cm)<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>5.1<\/td>\n<td>NaN<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>4.9<\/td>\n<td>3.0<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>4.7<\/td>\n<td>3.2<\/td>\n<td>NaN<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>7.0<\/td>\n<td>3.2<\/td>\n<td>4.7<\/td>\n<td>1.4<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>6.4<\/td>\n<td>3.2<\/td>\n<td>4.5<\/td>\n<td>1.5<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>6.9<\/td>\n<td>3.1<\/td>\n<td>4.9<\/td>\n<td>NaN<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<pre><code class=\"language-python\"># \u5220\u9664\u884c\u4e0d\u4e3a4\u4e2a\u503c\u7684\ndf.dropna(thresh=4)<\/code><\/pre>\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }<\/p>\n<p>    .dataframe tbody tr th {\n        vertical-align: top;\n    }<\/p>\n<p>    .dataframe thead th {\n        text-align: right;\n    }\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>sepal length (cm)<\/th>\n<th>sepal width (cm)<\/th>\n<th>petal length (cm)<\/th>\n<th>petal width (cm)<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>1<\/th>\n<td>4.9<\/td>\n<td>3.0<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>7.0<\/td>\n<td>3.2<\/td>\n<td>4.7<\/td>\n<td>1.4<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>6.4<\/td>\n<td>3.2<\/td>\n<td>4.5<\/td>\n<td>1.5<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<pre><code class=\"language-python\"># \u5220\u9664\u82b1\u843c\u957f\u5ea6\u4e2d\u6709NaN\u503c\u7684\u6570\u636e\ndf.dropna(subset=[&#039;sepal length (cm)&#039;])<\/code><\/pre>\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }<\/p>\n<p>    .dataframe tbody tr th {\n        vertical-align: top;\n    }<\/p>\n<p>    .dataframe thead th {\n        text-align: right;\n    }\n<\/style>\n<table border=\"1\" class=\"dataframe\">\n<thead>\n<tr style=\"text-align: right;\">\n<th><\/th>\n<th>sepal length (cm)<\/th>\n<th>sepal width (cm)<\/th>\n<th>petal length (cm)<\/th>\n<th>petal width (cm)<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>5.1<\/td>\n<td>NaN<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>4.9<\/td>\n<td>3.0<\/td>\n<td>1.4<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>4.7<\/td>\n<td>3.2<\/td>\n<td>NaN<\/td>\n<td>0.2<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>7.0<\/td>\n<td>3.2<\/td>\n<td>4.7<\/td>\n<td>1.4<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>6.4<\/td>\n<td>3.2<\/td>\n<td>4.5<\/td>\n<td>1.5<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>6.9<\/td>\n<td>3.1<\/td>\n<td>4.9<\/td>\n<td>NaN<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<h3>4.6.1.2 \u586b\u5145\u7f3a\u5931\u503c<\/h3>\n<p><\/p><div id=\"rml_readmorelogin_placeholder\" style=\"position:relative;\"><div id=\"rml_fade_content\" style=\"position: absolute;\r\ntop:-10em;\r\nwidth:100%;\r\nheight:10em;\r\nbackground: -webkit-linear-gradient(rgba(255, 255, 255, 0) 0%,#ffffff 100%);\r\nbackground-image: -moz-linear-gradient(rgba(255, 255, 255, 0) 0%,#ffffff 100%);\r\nbackground-image: -o-linear-gradient(rgba(255, 255, 255, 0) 0%,#ffffff 100%);\r\nbackground-image: linear-gradient(rgba(255, 255, 255, 0) 0%,#ffffff 100%);\r\nbackground-image: -ms-linear-gradient(rgba(255, 255, 255, 0) 0%,#ffffff 100%);\"><\/div><div class=\"wpf-controller aru_rml_from_in_post\" style=\"background-color:#eeeeee;border:5px solid #cce6ff;\" id=\"ARU_ReadMoreLogin_ReadMoreLoginController\"><h2 id=\"Header\">\u67e5\u770b\u66f4\u591a<\/h2><div id=\"Message\"><p>\u8054\u7cfb\u7ba1\u7406\u5458\u5fae\u4fe1tutu19192010\uff0c\u6ce8\u518c\u8d26\u53f7<\/p>\n<\/div><div id=\"StatusBarHeader\"><\/div><form id=\"ARU_ReadMoreLogin_ReadMoreLoginController\"><input name=\"post_id\" value=\"3289\" type=\"hidden\"\/><input name=\"_init_callback\" value=\"InitLogin\" type=\"hidden\"\/><input name=\"post_id\" value=\"3289\" type=\"hidden\"\/><input name=\"rt_ype\" value=\"1\" type=\"hidden\"\/><input name=\"nonce\" value=\"65b7b8c5e8\" type=\"hidden\"\/><input name=\"_wpnonce\" value=\"c554ecb123\" type=\"hidden\"\/><input name=\"_controller\" value=\"ARU_ReadMoreLogin\\ReadMoreLoginController\" type=\"hidden\"\/><input name=\"_proxy_controller\" value=\"ARU_ReadMoreLogin\\ReadMoreLoginController\" type=\"hidden\"\/><input name=\"_view\" value=\"ARU_ReadMoreLogin\\ReadMoreLoginView\" type=\"hidden\"\/><table class=\"wpf-table-placeholder\"><tbody class=\"wpf-table-placeholder\"><tr class=\"wpf-table-placeholder\"><td class=\"wpf-table-placeholder-input\" width=\"400px\"><table class=\"wpf-table-placeholder\"><tbody class=\"wpf-table-placeholder\"><tr class=\"wpf-table-placeholder\"><th class=\"wpf-table-placeholder-input\"><label class=\"wpf-label\">Username:<\/label><\/th><\/tr><tr class=\"wpf-table-placeholder\"><td class=\"wpf-table-placeholder-input\"><input class=\"regular-text text_input\" name=\"username\" value=\"\" type=\"text\"\/><\/td><\/tr><tr class=\"wpf-table-placeholder\"><th class=\"wpf-table-placeholder-input\"><label class=\"wpf-label\">Password:<\/label><\/th><\/tr><tr class=\"wpf-table-placeholder\"><td class=\"wpf-table-placeholder-input\"><input class=\"regular-text text_input\" name=\"password\" value=\"\" type=\"password\"\/><\/td><\/tr><\/tbody><\/table><p class=\"wpf-table-placeholder submit\"><button class=\"wp_plugin_framework_ajax_button\" type=\"button\" style=\"background-color:#4D90FE;;color:#ffffff;;border:1px solid #3079ed;\" name=\"_event\" value=\"ButtonLogin\">Log in<\/button><\/p><\/td><td class=\"wpf-table-placeholder-input\"><\/td><\/tr><\/tbody><\/table><\/form><div id=\"ButtonStartRegister\"><a href=\"https:\/\/egonlin.com\/wp-login.php?action=register\">Register<\/a><\/div><div id=\"Link1\"><a href=\"https:\/\/egonlin.com\/wp-login.php?action=lostpassword\">Forgotten username or password?<\/a><\/div><div id=\"StatusBarFooter\"><\/div><\/div><\/div><div id=aru_remaining_content><\/div>","protected":false},"excerpt":{"rendered":"<p>\u7ec6\u5206\u6784\u5efa\u673a\u5668\u5b66\u4e60\u5e94\u7528\u7a0b\u5e8f\u7684\u6d41\u7a0b-\u6570\u636e\u9884\u5904\u7406 sklearn\u6570\u636e\u9884\u5904\u7406\u5b98\u65b9\u6587\u6863\u5730\u5740\uff1ahttps:\/\/scikit [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":3275,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[276,301],"tags":[],"_links":{"self":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3289"}],"collection":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3289"}],"version-history":[{"count":0,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3289\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/media\/3275"}],"wp:attachment":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3289"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3289"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3289"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}