Pyspark-object has no attribute 'spark' - python

from unittest import TestCase
from pyspark.sql import SparkSession
from lib.utils import load_survey_df, count_by_country
class UtilsTestCase(TestCase):
#classmethod
def create_testing_pyspark_session(cls):
return SparkSession.builder.master('local[2]').appName('my - local - testing - pyspark - context').getOrCreate()
#classmethod
def SetUpClass(cls) -> None:
cls.spark = cls.create_testing_pyspark_session()
def test_datafile_loading(self):
sample_df = load_survey_df(self.spark, "data/sample.csv")
result_count = sample_df.count()
self.assertEqual(result_count, 9, "Record count should be 9")
def test_country_count(self):
sample_df = load_survey_df(self.spark, "data/sample.csv")
count_list = count_by_country(sample_df).collect()
count_dict = dict()
for row in count_list:
count_dict[row["Country"]] = row["count"]
self.assertEqual(count_dict["United States"], 4, "Count for United States should be 4")
self.assertEqual(count_dict["Canada"], 2, "Count for United States should be 2")
self.assertEqual(count_dict["United Kingdom"], 1, "Count for United States should be 1")
Hi All,
Can you please tell me what's wrong with this code? I am getting below error:
Error
Traceback (most recent call last):
File "C:\Users\abc\AppData\Local\Continuum\anaconda3\lib\unittest\case.py", line 59, in testPartExecutor
yield
File "C:\Users\abc\AppData\Local\Continuum\anaconda3\lib\unittest\case.py", line 628, in run
testMethod()
File "C:\Users\abc\PycharmProjects\HelloSpark\lib\test_utils.py", line 17, in test_datafile_loading
sample_df = load_survey_df(self.spark, "data/sample.csv")
AttributeError: 'UtilsTestCase' object has no attribute 'spark'

I see that within your SetUpClass method you are using cls.spark,
you need to declare is as attribute in class UtilsTestCase.
just adding spark after your class to your code as follows should fix it:
class UtilsTestCase(TestCase):
spark
#classmethod
def create_testing_pyspark_session(cls):
return SparkSession.builder.master('local[2]').appName('my - local - testing - pyspark - context').getOrCreate()
#classmethod
def SetUpClass(cls) -> None:
cls.spark = cls.create_testing_pyspark_session()

Related

raise AttributeError(name) AttributeError: LCC_GetChannelHandle

I am very new in python cffi. I have to access my temprature module by using its Index or with its channel name. I am trying with both as you can see in my QmixTC class. I am getting attribute error. In other class, there is no errors. Can someone help me understand where is the problem. I am putting my code as well as error trace. Thanks.
main code with name qmix.py (importing it in to sample code):
class QmixTC (object):
"""
"""
def __init__(self, index=0, handle=None,name=''):
self.dll_dir = DLL_DIR
self.dll_file = os.path.join(self.dll_dir,
'labbCAN_Controller_API.dll')
self._ffi = FFI()
self._ffi.cdef(CONTROLLER_HEADER)
self._dll = self._ffi.dlopen(self.dll_file)
self._handle = self._ffi.new('dev_hdl *', 0)
if handle is None:
self.index = index
self._handle = self._ffi.new('dev_hdl *', 0)
self._call('LCC_GetChannelHandle', self.index, self._handle)
else:
self.index = None
self._handle = handle
self._ch_name="QmixTC_1_DO0_INA"
self._channel = self._ch_name + str(index)
self._call('LCC_LookupChanByName',
bytes(self._channel,'utf8'),
self._handle)
self.name = name
def _call(self, func_name, *args):
func = getattr(self._dll, func_name)
r = func(*args)
r = CHK(r, func_name, *args)
return r
def Setpoint_write (self, setpoint):
"""
Write setpoint value to controller device.
Parameters
[in] ChanHdl Valid handle of open controller channel
[in] fSetPointValue The setpoint value to write
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_WriteSetPoint', self._handle[0], setpoint)
def enable_controllLoop (self, enable):
"""
Enables / disables a control loop.
If the control loop is enabled, then the output value is calculated periodically.
Parameters
ChanHdl Valid handle of a controller channel
Enable 1 = enable, 0 = disable
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_EnableControlLoop', self._handle[0], enable)
def read_temp_value (self, actualvalue):
"""
Read actual value from device.
Parameters
[in] ChanHdl Valid handle of open controller channel
[out] pfActualValue Returns the actual controller value
Returns
Error code - ERR_NOERR indicates success
"""
self._call('LCC_ReadActualValue', self._handle[0], actualvalue)
if __name__ == '__main__':
import os.path as op
dll_dir = op.normpath('C:\\Users\\Ravikumar\\AppData\\Local\\QmixSDK')
config_dir = op.normpath('C:\\Users\\Public\\Documents\\QmixElements\\Projects\\QmixTC_Pump\\Configurations\\QmixTC_pump')
bus = QmixBus(config_dir=config_dir)
bus.open()
bus.start()
controller_0 = QmixTC(index=0)
controller_0.enable_controllLoop(1)
sample program:
from __future__ import division, print_function
from win32api import GetSystemMetrics
import numpy as np
import os
import qmix
import pandas as pd
#%% CHANNEL INITIALIZATION
if __name__ == '__main__':
dll_dir = ('C:\\Users\\Ravikumar\\AppData\\Local\\QmixSDK')
config_dir = ('C:\\Users\\Public\\Documents\\QmixElements\\Projects\\QmixTC_test1\\Configurations\\QmixTC_test1')
qmix_bus = qmix.QmixBus(config_dir=config_dir,dll_dir=dll_dir)
qmix_bus.open()
qmix_bus.start()
controller_0 = qmix.QmixTC(index=0)
controller_0.Setpoint_write(50)
error:
Traceback (most recent call last):
File "<ipython-input-5-40d4a3db9493>", line 17, in <module>
controller_0 = qmix.QmixTC(index=0)
File "qmix.py", line 921, in __init__
self._call('LCC_GetChannelHandle', self.index, self._handle)
File "qmix.py", line 937, in _call
func = getattr(self._dll, func_name)
File "C:\Users\Ravikumar\Anaconda2\lib\site-packages\cffi\api.py", line 875, in __getattr__
make_accessor(name)
File "C:\Users\Ravikumar\Anaconda2\lib\site-packages\cffi\api.py", line 870, in make_accessor
raise AttributeError(name)
AttributeError: LCC_GetChannelHandle

Python Crash Course, Testing Your Code example

This is to test the class; however, I am getting an error and I do not know how to fix it.
import unittest
from name_function import get_formatted_name
class NamesTestCase(unittest.TestCase):
"""Tests for 'name_function.py'"""
def test_first_last_name(self):
"""Do names like 'Mark James' work?"""
formatted_name = get_formatted_name('mark','James')
self.assertEqual(formatted_name,'Mark James')
unittest.main()
Here's the class that is being tested.
def get_formatted_name(first, last):
"""This is where the name is formatted correctly"""
full_name = first + " " + last
return full_name.title()
The error I am getting is this:
/Desktop/python_work/test_name_function.py", line 1, in <module>
import unittest
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/unittest/__init__.py", line 58, in <module>
from .result import TestResult
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/unittest/result.py", line 5, in <module>
import traceback
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/traceback.py", line 3, in <module>
import linecache
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/linecache.py", line 10, in <module>
import tokenize
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/tokenize.py", line 96, in <module>
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
AttributeError: 'module' object has no attribute 'namedtuple'
Anyone have a clue?
I believe you need to add if 'name' == 'main':
unittest.main() to your code.
The below code should fix the error -
import unittest
from name_function import get_formatted_name
class NamesTestCase(unittest.TestCase):
"""Tests for 'name_function.py'"""
def test_first_last_name(self):
"""Do names like 'Mark James' work?"""
formatted_name = get_formatted_name('mark','James')
self.assertEqual(formatted_name,'Mark James')
if '__name__' == '__main__':
unittest.main()

Is there a bug in this python module or am I using it the wrong way?

lambda from getattr getting called with "connection" as a keyword argument? Am I misusing the code or is there a bug?
Code and traceback: https://github.com/bigcommerce/bigcommerce-api-python/issues/32
#!/usr/bin/env python2
import bigcommerce
import bigcommerce.api
BIG_URL = 'store-45eg5.mybigcommerce.com'
BIG_USER = 'henry'
BIG_KEY = '10f0f4f371f7953c4d7d7809b62463281f15c829'
api = bigcommerce.api.BigcommerceApi(host=BIG_URL, basic_auth=(BIG_USER, BIG_KEY))
def get_category_id(name):
get_request = api.Categories.get(name)
try:
cat_list = api.Categories.all(name=name)
if cat_list:
return cat_list[0]['id']
else:
return None
except:
return None
def create_category(name):
rp = api.Categories.create(name)
if rp.status == 201:
return rp.json()['id']
else:
return get_category_id(name)
create_category('anothertestingcat')
Gives this traceback:
Traceback (most recent call last):
File "./bigcommerceimporter.py", line 50, in
create_category('anothertestingcat')
File "./bigcommerceimporter.py", line 44, in create_category
rp = api.Categories.create(name)
File "/home/henry/big_test_zone/local/lib/python2.7/site-packages/bigcommerce/api.py", line 57, in
return lambda args, *kwargs: (getattr(self.resource_class, item))(args, connection=self.connection, *kwargs)
TypeError: create() got multiple values for keyword argument 'connection'
Line in api.py that the traceback refers to: https://github.com/bigcommerce/bigcommerce-api-python/blob/master/bigcommerce/api.py#L57
According to the examples, create should be used like this:
api.Categories.create(name = 'anothertestingcat')
Note: You should generate a new API KEY, since you published the current one in this question.

how to return a collection from mongodb using pymongo

I'm trying to create a Collection Class in Python to access the various collections in my db. Here's what I've got:
import sys
import os
import pymongo
from pymongo import MongoClient
class Collection():
client = MongoClient()
def __init__(self, db, collection_name):
self.db = db
self.collection_name = collection_name
# self.data_base = getattr(self.client, db)
# self.collObject = getattr(self.data_base, self.collection_name)
def getCollection(self):
data_base = getattr(self.client, self.db)
collObject = getattr(data_base, self.collection_name)
return collObject
def getCollectionKeys(self, collection):
"""Get a set of keys from a collection"""
keys_list = []
collection_list = collection.find()
for document in collection_list:
for field in document.keys():
keys_list.append(field)
keys_set = set(keys_list)
return keys_set
if __name__ == '__main__':
print"Begin Main"
agents = Collection('hkpr_restore','agents')
print "agents is" , agents
agents_collection = agents.getCollection
print agents_collection
print agents.getCollectionKeys(agents_collection)
I get the following output:
Begin Main
agents is <__main__.Collection instance at 0x10ff33e60>
<bound method Collection.getCollection of <__main__.Collection instance at 0x10ff33e60>>
Traceback (most recent call last):
File "collection.py", line 52, in <module>
print agents.getCollectionKeys(agents_collection)
File "collection.py", line 35, in getCollectionKeys
collection_list = collection.find()
AttributeError: 'function' object has no attribute 'find'
The function getCollectionKeys works fine outside of a class. What am I doing wrong?
This line:
agents_collection = agents.getCollection
Should be:
agents_collection = agents.getCollection()
Also, you don't need to use getattr the way you are. Your getCollection method can be:
def getCollection(self):
return self.client[self.db][self.collection_name]

Phyton3 error >> global name 'self' is not defined

I don't know why I am having this error. Please enlighten me. Here's my code:
filename: sqlfunc.py
from sqlalchemy import create_engine
class SQL:
def __init__(self):
self.connection_string = ''
self.sql = ''
def exec_nonquery(connection_string, sql):
self.connection_string = connection_string
self.sql = sql
self.__connection = self.__mydb(self.connection_string)
self.__transaction = self.__connection.begin()
try:
self.__connection.execute(self.sql).fetchall()
self.__transaction.commit()
except:
self.__transaction.rollback()
raise
_connection.close()
def exec_query(self, connection_string, sql):
self.connection_string = connection_string
self.sql = sql
self.__connection = self.__mydb(self.connection_string)
self.__result = None
self.query_result = []
try:
self.__result = self.__connection.execute(sql)
self.query_result = list(self.__result)
except:
raise
self.__connection.close()
return self.query_result
Now, I tried:
from sqlfunc import SQL
SQL.exec_nonquery('mssql+pyodbc://scott:tiger#mydsn','select * from table1')
and I got this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "./sqlfunc.py", line 25, in exec_nonquery
self.connection_string = connection_string
NameError: global name 'self' is not defined
Is there anything I did wrong or missing?
I changed exec_nonquery to
def exec_nonquery(self, connection_string, sql)
But it leads me to this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: exec_nonquery() missing 1 required positional argument: 'sql'
"Is there anything I did wrong or missing?" - yes, you failed to include self as the first positional argument to your method:
def exec_nonquery(connection_string, sql):
should be
def exec_nonquery(self, connection_string, sql):
# ^ see here
You are also trying to call this instance method on the class. The minimal fix is:
sql = SQL() # create instance
sql.exec_nonquery('mssql+pyodbc://scott:tiger#mydsn',
'select * from table1') # call method
but as Martijn points out you really should refactor the class to make the most of OOP.

Categories

Resources