Commit 686dea2f authored by christian.foerster's avatar christian.foerster

update

parent 9c53451a
......@@ -34,7 +34,7 @@
"\n",
"These columns must be in the dataframe:\n",
"\n",
"sepal_length | sepal_width | petal_length | petal_width | target_name\n",
"sepal_length | sepal_width | petal_length | petal_width | species\n",
"\n"
]
},
......
This diff is collapsed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas, (Numpy), Statsmodels and Plotting\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#####################################################\n",
"## YOUR DATA\n",
"from sklearn import datasets\n",
"iris=datasets.load_iris()\n",
"\n",
"iris_data=iris.data\n",
"iris_header=iris.feature_names\n",
"iris_group=iris.target\n",
"iris_target_names=iris.target_names\n",
"####################################################"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**1. Convert data to a Pandas dataframe!**\n",
"\n",
"These columns must be in the dataframe:\n",
"\n",
"sepal_length | sepal_width | petal_length | petal_width | species\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"df = pd.DataFrame(iris_data, columns=iris_header)\n",
"df.columns = [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]\n",
"df[\"species\"] = iris_group\n",
"for i in range(3):\n",
" df.loc[df.species == i, \"species\"] = iris_target_names[i]\n",
" \n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**2. Plot the data to get a better feel for it. (Scattermatrix would be a good idea)**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from pandas.plotting import scatter_matrix\n",
"\n",
"# to get a nice plot we're gonna use some colors\n",
"species_to_color = { 'setosa': '#377eb8',\n",
" 'versicolor': '#4eae4b',\n",
" 'virginica': '#e41a1c'}\n",
"\n",
"colors = [species_to_color[s] for s in df.species]\n",
"\n",
"scatter_matrix(df,c=colors, figsize=(16,16))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**3. Now plot the data _grouped_ by target_name.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby(\"species\").plot()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**4. Create a multidimensional linear model that tries to guess the petal width depending on petal_length, sepal_width, sepal_length and check how well it fits!**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import statsmodels.formula.api as smf\n",
"\n",
"linear = smf.ols(formula='petal_width ~ petal_length + sepal_width + sepal_length', data=df)\n",
"result_linear = linear.fit()\n",
"print(result_linear.summary())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**5. Create Numpy array from the setosa sepal and petal values only!**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array=df.loc[df.species == \"setosa\", df.columns[:-1]].values\n",
"array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.species.unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"@webio": {
"lastCommId": null,
"lastKernelId": null
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas, (Numpy), Statsmodels and Plotting\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#####################################################\n",
"## YOUR DATA\n",
"from sklearn import datasets\n",
"iris=datasets.load_iris()\n",
"\n",
"iris_data=iris.data\n",
"iris_header=iris.feature_names\n",
"iris_group=iris.target\n",
"iris_target_names=iris.target_names\n",
"####################################################"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**1. Convert data to a Pandas dataframe!**\n",
"\n",
"These columns must be in the dataframe:\n",
"\n",
"sepal_length | sepal_width | petal_length | petal_width | species\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"df = pd.DataFrame(iris_data, columns=iris_header)\n",
"df.columns = [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]\n",
"df[\"species\"] = iris_group\n",
"for i in range(3):\n",
" df.loc[df.species == i, \"species\"] = iris_target_names[i]\n",
" \n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**2. Plot the data to get a better feel for it. (Scattermatrix would be a good idea)**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from pandas.plotting import scatter_matrix\n",
"\n",
"# to get a nice plot we're gonna use some colors\n",
"species_to_color = { 'setosa': '#377eb8',\n",
" 'versicolor': '#4eae4b',\n",
" 'virginica': '#e41a1c'}\n",
"\n",
"colors = [species_to_color[s] for s in df.species]\n",
"\n",
"scatter_matrix(df,c=colors, figsize=(16,16))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**3. Now plot the data _grouped_ by target_name.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.groupby(\"species\").plot()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**4. Create a multidimensional linear model that tries to guess the petal width depending on petal_length, sepal_width, sepal_length and check how well it fits!**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import statsmodels.formula.api as smf\n",
"\n",
"linear = smf.ols(formula='petal_width ~ petal_length + sepal_width + sepal_length', data=df)\n",
"result_linear = linear.fit()\n",
"print(result_linear.summary())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**5. Create Numpy array from the setosa sepal and petal values only!**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"array=df.loc[df.species == \"setosa\", df.columns[:-1]].values\n",
"array"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.species.unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"@webio": {
"lastCommId": null,
"lastKernelId": null
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment