I am working on an assignment and I run into this error. I am using python to perform an KNN on a data set. I pretty sure I defined the variable but it says otherwise. This code is written below.
`
import pandas as PD
import numpy as np
import matplotlib.pyplot as mtp
data_set= PD.read_csv('hw6.data.csv.gz')
x= data_set.iloc[:,[2,3]].valuesS
y= data_set.iloc[:, 4].values
from sklearn.model_selection import train_test_split
x_train, x_train, y_train, y_train= train_test_split(x,y, test_size=.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
`
`
import pandas as PD
import numpy as np
import matplotlib.pyplot as mtp
data_set= PD.read_csv('hw6.data.csv.gz')
x= data_set.iloc[:,[2,3]].valuesS
y= data_set.iloc[:, 4].values
from sklearn.model_selection import train_test_split
x_train, x_train, y_train, y_train= train_test_split(x,y, test_size=.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
`
The error says "x_test" is not defined Pylance (reportUndefinedVarible)
Related
I was trying to make a program to predict the runs made by a cricketer. I used a csv file for data made by me. The code is:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
#Data
data = pd.read_csv('Rohit Sharma.csv')
X = [['against','wickets','currentrun','weather','ball','over']]
Y = ['runsmade']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, train_size=None, random_state=42)
reg = LinearRegression()
reg.fit(x_train,y_train)
a = reg.predict(x_test)
print(a)
print(data)
But it showed an error:
ValueError: With n_samples=1, test_size=0.33 and train_size=None, the resulting
train set will be empty. Adjust any of the aforementioned parameters
How to fix it?
Try this:
Looks like you made an error while selecting the columns of the data. See below.
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
#Data
data = pd.read_csv('Rohit Sharma.csv')
X = data[['against','wickets','currentrun','weather','ball','over']].to_numpy()
Y = data['runsmade'].to_numpy()
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, random_state=42)
reg = LinearRegression()
reg.fit(x_train,y_train)
a = reg.predict(x_test)
print(a)
print(data)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
dataset=pd.read_csv('/content/train.csv')
data=pd.DataFrame(dataset)
index_names = data[ data['LotFrontage'] == "NA" ].index
data.drop(index_names, inplace = True)
X=data.iloc[0:200,[0,3,4,17,18]].values
Y=data.iloc[0:200,[80]].values
x_train, x_test, y_train, y_test= train_test_split(X,
Y, test_size= 0.25, random_state=0)
sc_x= StandardScaler()
x_train= sc_x.fit_transform(x_train)
x_test= sc_x.transform(x_test)
classifier= RandomForestClassifier(n_estimators= 200,criterion="entropy")
classifier.fit(x_train, y_train)
classifier.fit(x_train, y_train) This syntax is showing below error
and for
np.isnan(x_train.values) //result : false
np.isinfinite(x_train.values) //result : false
Took me 10 seconds to find a Duplicate question...
Anyhow, RandomForestClassifier masks your data as float32's, your original is probably a float64, and therefore you get a size error as information is lost.
Float32s should do the job fine though, so you can easily cast your data to a float32:
x_train = sc_x.fit_transform(x_train).astype('float32')
y_train = sc_x.fit_transform(y_train).astype('float32')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
df = pd.read_csv('CarSeats_Dataset.csv')
df=df.dropna()
dummies=pd.get_dummies(df[['ShelveLoc', 'Urban', 'US']])
X = df.drop('Sales',axis=1)
y = np.log(df['Sales'])
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
regressor = DecisionTreeRegressor(random_state = 42)
regressor.fit(X_train, y_train)
I was trying to predict the Sales but when tried to fit the regressor I got the error : <ValueError: could not convert string to float: 'Bad'/>
I am a beginner in this and I do not know how to fix it. Can anyone help me with that please?
import pandas as pd
Data = {'Product': ['ABC','XYZ'],
'Price': ['250','270']}
df = pd.DataFrame(Data)
df['Price'] = df['Price'].astype(float)
print (df)
print (df.dtypes)
I want to plot a confusion matrix to visualize the classifer's performance, but it accuracy and recall does not show
Accuracy Screenshot
I don't see any data here, or any code either. Anyway, this works for me.
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
X, y = make_classification(n_samples=1000, n_features=30,
n_informative=12,
n_clusters_per_class=1, n_classes=10,
class_sep=2.0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, stratify=y)
clf = LogisticRegression(max_iter=1000, random_state=42).fit(X_train, y_train)
df = pd.DataFrame(classification_report(clf.predict(X_test),
y_test, digits=2,
output_dict=True)).T
df['support'] = df.support.apply(int)
df.style.background_gradient(cmap='viridis',subset=pd.IndexSlice['0':'9', :'f1-score'])
import seaborn as sns
sns.heatmap(df, annot=True)
I am trying to train a Linear Regression Qualifier to continue a grap.
I have a couple of thousand lines of data in my csv file that I import into numpy arrays. Here is my code :
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import csv
import math
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def predict():
sample_data = pd.read_csv("includes\\csv.csv")
x = np.array(sample_data["day"])
y = np.array(sample_data["balance"])
for x in x:
x = x.reshape(1, -1)
#lol
for y in y:
y.reshape(1, -1)
#lol
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
clf.fit(x_train, y_train)
clf.score(x_test, y_test)
When I run this, the error is:
TypeError: Singleton array 6014651 cannot be considered a valid collection.
Any ideas why that's a thing?
After discussion in comments:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import csv
import math
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def predict():
sample_data = pd.read_csv("includes\\csv.csv")
x = np.array(sample_data["day"])
y = np.array(sample_data["balance"])
x = x.reshape(-1,1)
y = y.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
X_train, X_test should be capitals, python variables are case sensitive