prepare X_train, X_test, y_train, y_test
import pandas as pd
from sklearn.model_selection import train_test_split
= pd.read_csv("data/canada_usa_cities.csv")
df
= df.pop("country"), df
y, X = train_test_split(
X_train, X_test, y_train, y_test =0.2, random_state=123
X, y, test_size
)
=1).head() pd.concat([X_train, y_train], axis
longitude | latitude | country | |
---|---|---|---|
160 | -76.4813 | 44.2307 | Canada |
127 | -81.2496 | 42.9837 | Canada |
169 | -66.0580 | 45.2788 | Canada |
188 | -73.2533 | 45.3057 | Canada |
187 | -67.9245 | 47.1652 | Canada |