How to avoid Collection Error Python Numpy - python

I am trying to train a Linear Regression Qualifier to continue a grap.
I have a couple of thousand lines of data in my csv file that I import into numpy arrays. Here is my code :
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import csv
import math
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def predict():
sample_data = pd.read_csv("includes\\csv.csv")
x = np.array(sample_data["day"])
y = np.array(sample_data["balance"])
for x in x:
x = x.reshape(1, -1)
#lol
for y in y:
y.reshape(1, -1)
#lol
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
clf.fit(x_train, y_train)
clf.score(x_test, y_test)
When I run this, the error is:
TypeError: Singleton array 6014651 cannot be considered a valid collection.
Any ideas why that's a thing?

After discussion in comments:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import csv
import math
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def predict():
sample_data = pd.read_csv("includes\\csv.csv")
x = np.array(sample_data["day"])
y = np.array(sample_data["balance"])
x = x.reshape(-1,1)
y = y.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

X_train, X_test should be capitals, python variables are case sensitive

Related

KNN: why is my variable is not defined in python?

I am working on an assignment and I run into this error. I am using python to perform an KNN on a data set. I pretty sure I defined the variable but it says otherwise. This code is written below.
`
import pandas as PD
import numpy as np
import matplotlib.pyplot as mtp
data_set= PD.read_csv('hw6.data.csv.gz')
x= data_set.iloc[:,[2,3]].valuesS
y= data_set.iloc[:, 4].values
from sklearn.model_selection import train_test_split
x_train, x_train, y_train, y_train= train_test_split(x,y, test_size=.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
`
`
import pandas as PD
import numpy as np
import matplotlib.pyplot as mtp
data_set= PD.read_csv('hw6.data.csv.gz')
x= data_set.iloc[:,[2,3]].valuesS
y= data_set.iloc[:, 4].values
from sklearn.model_selection import train_test_split
x_train, x_train, y_train, y_train= train_test_split(x,y, test_size=.25, random_state=0)
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
`
The error says "x_test" is not defined Pylance (reportUndefinedVarible)

train_test_data_split function is showing problem

I was trying to make a program to predict the runs made by a cricketer. I used a csv file for data made by me. The code is:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
#Data
data = pd.read_csv('Rohit Sharma.csv')
X = [['against','wickets','currentrun','weather','ball','over']]
Y = ['runsmade']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, train_size=None, random_state=42)
reg = LinearRegression()
reg.fit(x_train,y_train)
a = reg.predict(x_test)
print(a)
print(data)
But it showed an error:
ValueError: With n_samples=1, test_size=0.33 and train_size=None, the resulting
train set will be empty. Adjust any of the aforementioned parameters
How to fix it?
Try this:
Looks like you made an error while selecting the columns of the data. See below.
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
#Data
data = pd.read_csv('Rohit Sharma.csv')
X = data[['against','wickets','currentrun','weather','ball','over']].to_numpy()
Y = data['runsmade'].to_numpy()
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, random_state=42)
reg = LinearRegression()
reg.fit(x_train,y_train)
a = reg.predict(x_test)
print(a)
print(data)

confusion matrix report accuracy problem jupyter

I want to plot a confusion matrix to visualize the classifer's performance, but it accuracy and recall does not show
Accuracy Screenshot
I don't see any data here, or any code either. Anyway, this works for me.
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
X, y = make_classification(n_samples=1000, n_features=30,
n_informative=12,
n_clusters_per_class=1, n_classes=10,
class_sep=2.0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, stratify=y)
clf = LogisticRegression(max_iter=1000, random_state=42).fit(X_train, y_train)
df = pd.DataFrame(classification_report(clf.predict(X_test),
y_test, digits=2,
output_dict=True)).T
df['support'] = df.support.apply(int)
df.style.background_gradient(cmap='viridis',subset=pd.IndexSlice['0':'9', :'f1-score'])
import seaborn as sns
sns.heatmap(df, annot=True)

How to fix error with train_test_split in Python?

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x=np.array([0.1,0.2,0.7,8.0,45.0,56.0,66.0,0.7,0.6,64.0])
y=np.array([0,0,0,1,1,1,1,0,0,1])
x = np.array(x).reshape((1, -1))
y = np.array(y).reshape((1, -1))
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.4, train_size=0.5, random_state=7, stratify=y)
knn = KNeighborsClassifier()
knn.fit(y_train, x_train)
y_train_predict = knn.predict(x_train)
y_test_predict = knn.predict(x_test)
print(y_train_predict)
print(y_test_predict)
Error:
With n_samples=1, test_size=0.4 and train_size=0.5, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Try:
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)

ValueError: Expected 2D array, got scalar array instead: array=11

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('C:/Users/Dell/Desktop/Salary.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3,
random_state=0)
from sklearn.linear_model import LinearRegression
simplelinearRegresson = LinearRegression()
simplelinearRegresson.fit(X_train, y_train)
y_predict = simplelinearRegresson.predict(X_test)
Below line has error:
y_predict_val = simplelinearRegresson.predict(11)
You need to convert your scalar to a 2D array with shape (number of samples, number of features).
y_predict_val = simplelinearRegresson.predict([[11]])
This is what the predict method expects. See docs for more info.

Categories

Resources