'DataFrame' object has no attribute 'flush' - python

I'm trying to solve Boston house price prediction problem,but it has this error
AttributeError: 'DataFrame' object has no attribute 'flush'
and this:
`
Cell In [53], line 7, in load_data()
5 def load_data():
6 datafile= pd.read_csv("housing.csv",sep=',')
----> 7 data = np.fromfile(datafile)
8 feature_names = ['RM', 'LSTAT', 'PTRATIO', 'MEDV']
9 feature_num = len(feature_names)
`
here's a part of my code
`
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def load_data():
datafile= pd.read_csv("housing.csv",sep=',')
data = np.fromfile(datafile)
feature_names = ['RM', 'LSTAT', 'PTRATIO', 'MEDV']
feature_num = len(feature_names)
data = data.reshape(data.shape[0] // feature_num, feature_num)
ratio = 0.8
offset = int(data.shape[0] * ratio)
training = data[:offset]
maximums, minimums, avge = training.max(axis=0), training.min(axis=0), training.sum(axis=0) / training.shape[0]
`
the word "flush" doesn't appear in my code or in my data
can anyone give me some idea?

You are reading the housing.csv file with pd.read_csv, which converts it to a Dataframe object. This leads to the error, because np.fromfile expects a file (str or path), not a Dataframe.
To get rid of the error, replace the first to statements in the load_data function with a single suitable numpy function such as np.genfromtext.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def load_data():
data = np.genfromtxt('housing.csv', delimiter=',')
feature_names = ['RM', 'LSTAT', 'PTRATIO', 'MEDV']
# [...]

Related

Encoding target column

I'm constructing an ANN in python and I have a trouble encoding column[-1] (y) into binary numbers.
There are 6 different parameters in this column and I want to encode each one into a separate column, like done in columns of X with onehotencoder,
Thanks
Ido
dataframe_screenshot
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.metrics import confusion_matrix,accuracy_score,mean_squared_error,r2_score
import tensorflow as tf
from tensorflow import keras
Political_opinions = pd.read_csv("data.csv")
Political_opinions.drop(columns=['Timestamp','Yas','Bolge','Egitim'],axis=1,inplace=True)
print(Political_opinions)
one_hot_color = pd.get_dummies(Political_opinions.parti).values
print(Political_opinions.head(10))
Political_opinions["Cinsiyet"] = (Political_opinions["Cinsiyet"]=="Erkek").astype(int)
Political_opinions["soru1"] = (Political_opinions["soru1"]=="Hayır").astype(int)
Political_opinions["soru2"] = (Political_opinions["soru2"]=="Hayır").astype(int)
Political_opinions["soru3"] = (Political_opinions["soru3"]=="Hayır").astype(int)
Political_opinions["soru4"] = (Political_opinions["soru4"]=="Hayır").astype(int)
Political_opinions["soru5"] = (Political_opinions["soru5"]=="Hayır").astype(int)
Political_opinions["soru6"] = (Political_opinions["soru6"]=="Hayır").astype(int)
Political_opinions["soru7"] = (Political_opinions["soru7"]=="Hayır").astype(int)
Political_opinions["soru8"] = (Political_opinions["soru8"]=="Hayır").astype(int)
Political_opinions["soru9"] = (Political_opinions["soru9"]=="Hayır").astype(int)
Political_opinions["soru10"] = (Political_opinions["soru10"]=="Hayır").astype(int)
Political_opinions["parti"] = (Political_opinions["parti"]=="AKP").astype(int)
Political_opinions["parti"] = (Political_opinions["parti"]=="MHP").astype(int)
Political_opinions["parti"] = (Political_opinions["parti"]=="CHP").astype(int)
Political_opinions["parti"] = (Political_opinions["parti"]=="DIĞER").astype(int)
Political_opinions["parti"] = (Political_opinions["parti"]=="HDP").astype(int)

creating a dataset function using scikit-learn

so I am pretty new at Python, and I am trying to load a dataset from my computer using scikit. This is what my code looks like:
**whatever.py**
import numpy as np
import csv
from sklearn.datasets.base import Bunch
class Cortex_nuc:
def cortex_nuclear():
with open('C:/Users/User/Desktop/Data_Cortex_Nuclear4.csv') as csv_file:
data_file = csv.reader(csv_file)
temp = next(data_file)
n_samples = int(float(temp[0]))
n_features = int(float(temp[1]))
data = np.empty((n_samples, n_features))
target = np.empty((n_samples,), dtype=np.float64)
for i, sample in enumerate(data_file):
data[i] = np.asarray(sample[:-1], dtype=np.float64)
target[i] = np.asarray(sample[-1], dtype=np.float64)
return Bunch(data=data, target=target)
so then I import it into my project:
from whatever import Cortex_nuc
and after that I try to save it into df:
df = Cortex_nuc.cortex_nuclear()
Btw, this is what the dataset looks like:
this is just a part of the dataset, otherwise it has 77 columns and about a thousand rows.
But I get an error message and I can't seem to figure out why it's happening. Here's the error message:
IndexError Traceback (most recent call last)
<ipython-input-5-a4935f2c187f> in <module>
----> 1 df = Cortex_nuc.cortex_nuclear()
~\whatever.py in cortex_nuclear()
20
21 for i, sample in enumerate(data_file):
---> 22 data[i] = np.asarray(sample[:-1], dtype=np.float64)
23 target[i] = np.asarray(sample[-1], dtype=np.float64)
24
IndexError: index 0 is out of bounds for axis 0 with size 0
Can someone please help me? Thanks!
If you want to create a "sklearn-like" dataset in a Bunch object, you probably want something like this:
import pandas as pd
import numpy as np
from sklearn.utils import Bunch
# For reproducing
from io import StringIO
csv_file = StringIO("""
target,A,B
0,0,0
1,0,1
1,1,0
0,1,1
""")
def load_xor(*, return_X_y=False):
"""Describe your data here."""
_data_file = pd.read_csv(csv_file)
_data = Bunch()
_data["DESCR"] = load_xor.__doc__
_data["data"] = _data_file[["A", "B"]].to_numpy(dtype=np.float64)
_data["target"] = _data_file["target"].to_numpy(dtype=np.float64)
_data["target_names"] = np.array(["false", "true"])
_data["feature_names"] = np.array(list(_data_file.drop(["target"], axis=1)))
if return_X_y:
return _data.data, _data.target
return _data
if __name__ == "__main__":
# Return and unpack the `X`, `y` tuple
X, y = load_xor(return_X_y=True)
print(X, y)
This is because sklearn.datasets typically return Bunch objects with specific attributes/keys (for explanations, see the "Return" section of the load_iris documentation):
>>> from sklearn.datasets import load_iris
>>> data = load_iris()
>>> dir(data)
['DESCR', 'data', 'feature_names', 'filename', 'frame', 'target', 'target_names']

ValueError: could not convert string to float in univariate_selection

Hello i'm using univariate_selection method for selecting a best features from a following data set:
https://i.stack.imgur.com/J31T0.png
But i got an error
Value Error: could not convert string to float: 'SUDMyYggegA'
Below is my code:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
data = pd.read_csv("C://Users/Shahnawaz Irfan/Desktop/demo.csv")
X = data.iloc[:,0:15]
y = data.iloc[:,-13]
bestfeatures = SelectKBest(score_func=chi2, k=10)
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['features','Score']
print(featureScores.nlargest(15,'Score')) `

How to fix this elements?

Getting an error with some elements
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
import datetime
#Import our historical data
data = pd.read_csv('Data/sample.csv')
data.columns = [['Date','open','high','low','close','vol']]
data = data.drop_duplicates(keep=False)
data.Date = pd.to_datetime(data.Date,format='%Y.%m.%d %H:%M:%S.%f')
data = data.set_index(data.Date)
data = data[['open', 'high', 'close', 'vol']]
price = data.close.iloc[:100]
# Find our relative extrema
max_idx = argrelextrema(price.values,np.greater,order=1)
min_idx = argrelextrema(price.values,np.less,order=1)
print(max_idx)
print(min_idx)
The error is
Traceback (most recent call last):
File "untitled.py", line 9, in <module>
data.columns = [['Date','open','high','low','close','vol']]
ValueError: Length mismatch: Expected axis has 1 elements, new values have 6 elements
You want to pass a list, not a list of list or pandas will interpret the nested list as one column name.
data.columns = ['Date','open','high','low','close','vol']
Edit 1
Your CSV file seems to be separated by \t :
data = pd.read_csv('Data/sample.csv', sep=r'\t')
data.columns = ['Date','open','high','low','close','vol']

how can i fix the 'syntax error' in python using jupyter notebook?

hi tried to fix the error but i could not and i dont know where im going wrong can anyone please help . below is my code
my previous error was indentation error
import pandas as pd
import numpy as np
import xgboost as xgb
import sklearn as s
import matplotlib
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from IPython.display import display
df = pd.read_csv("C:/Users/patel/Desktop/tap.csv")
from IPython.display import display
X_all = df.drop(['FTR'],1)
y_all = df['FTR']
# Standardising the data.
from sklearn.preprocessing import scale
#Center to the mean and component wise scale to unit variance.
cols = [['FTHG','FTAG','HTHG','HTAG']]
for col in cols:
X_all[col] = scale(X_all[col])
X_all.HM1 = X_all.HM1.astype('str')
X_all.HM2 = X_all.HM2.astype('str')
X_all.HM3 = X_all.HM3.astype('str')
X_all.AM1 = X_all.AM1.astype('str')
X_all.AM2 = X_all.AM2.astype('str')
X_all.AM3 = X_all.AM3.astype('str')
def preprocess_features(X):
output = pd.DataFrame(index = X.index)
for col, col_df in X.iteritems():
if col_df.dtype == object:
col_df = pd.get_dummies(col_df, prefix = col)
output = output.join(col_df)
return output
X_all = preprocess_features(X_all)
print "Processed feature columns ({} total features):\n{}".format(len(X_all.columns), list(X_all.columns))
print "\nFeature values:"
display (X_all)
File "", line 39
print "Processed feature columns ({} total features):\n{}".format(len(X_all.columns), list(X_all.columns))
^
SyntaxError: invalid syntax
If you are using Python 3, then the parentheses in print function are missing. The following code should work.
print("Processed feature columns ({} total features):\n{}".format(len(X_all.columns), list(X_all.columns)))

Categories

Resources