02-机器学习基础: 监督学习——决策树
决策树:可解释的规则学习一、决策树要解决什么问题?1.1 从人类决策到机器学习importnumpyasnpimportmatplotlib.pyplotaspltfromsklearn.datasetsimportmake_classificationfromsklearn.model_selectionimporttrain_test_splitfromsklearn.treeimportDecisionTreeClassifier,plot_treefromsklearn.metricsimportaccuracy_scoreimportwarnings warnings.filterwarnings('ignore')print("="*60)print("决策树:模仿人类决策过程的算法")print("="*60)# 人类决策示例fig,ax=plt.subplots(figsize=(12,8))ax.axis('off')# 绘制决策树结构defdraw_tree(ax):# 根节点root=plt.Rectangle((0.35,0.7),0.3,0.1,facecolor='lightblue',ec='black')ax.add_patch(root)ax.text(0.5,0.75,'天气如何?',ha='center',va='center',fontsize=11)# 左分支ax.annotate('',xy=(0.25,0.6),xytext=(0.4,0.7),arrowprops=dict(arrowstyle='-',lw=2))left_node=plt.Rectangle((0.1,0.5),0.3,0.1,facecolor='lightgreen',ec='black')ax.add_patch(left_node)ax.text(0.25,0.55,'晴天',ha='center',va='center',fontsize=10)# 右分支ax.annotate('',xy=(0.75,0.6),xytext=(0.6,0.7),arrowprops=dict(arrowstyle='-',lw=2))right_node=plt.Rectangle((0.6,0.5),0.3,0.1,facecolor='lightgreen',ec='black')ax.add_patch(right_node)ax.text(0.75,0.55,'雨天',ha='center',va='center',fontsize=10)# 叶子节点ax.annotate('',xy=(0.15,0.4),xytext=(0.2,0.5),arrowprops=dict(arrowstyle='-',lw=1))leaf1=plt.Rectangle((0.05,0.3),0.2,0.1,facecolor='lightyellow',ec='black')ax.add_patch(leaf1)ax.text(0.15,0.35,'去打球',ha='center',va='center',fontsize=9)ax.annotate('',xy=(0.35,0.4),xytext=(0.3,0.5),arrowprops=dict(arrowstyle='-',lw=1))leaf2=plt.Rectangle((0.25,0.3),0.2,0.1,facecolor='lightyellow',ec='black')ax.add_patch(leaf2)ax.text(0.35,0.35,'待家里',ha='center',va='center',fontsize=9)ax.annotate('',xy=(0.65,0.4),xytext=(0.7,0.5),arrowprops=dict(arrowstyle='-',lw=1))leaf3=plt.Rectangle((0.55,0.3),0.2,0.1,facecolor='lightyellow',ec='black')ax.add_patch(leaf3)ax.text(0.65,0.35,'去打球',ha='center',va='center',fontsize=9)ax.annotate('',xy=(0.85,0.4),xytext=(0.8,0.5),arrowprops=dict(arrowstyle='-',lw=1))leaf4=plt.Rectangle((0.75,0.3),0.2,0.1,facecolor='lightyellow',ec='black')ax.add_patch(leaf4)ax.text(0.85,0.35,'看书',ha='center',va='center',fontsize=9)draw_tree(ax)ax.set_xlim(0,1)ax.set_ylim(0,1)ax.set_title('人类决策过程:一系列if-else规则',fontsize=14)plt.tight_layout()plt.show()print("\n💡 决策树的核心思想:")print(" 将复杂的决策问题分解为一系列简单的判断")print(" 每个节点问一个问题,根据答案走向不同分支")print(" 最终到达叶子节点,得到决策结果")二、决策树的构建原理2.1 如何选择最佳分裂特征?defexplain_split_criteria():"""解释分裂标准:信息增益和基尼系数"""fig,axes=plt.subplots(2,2,figsize=(14,10))# 1. 熵的概念ax1=axes[0,0]p=np.linspace(0.01