Cannot load graphlab.sframe - python

I have joined Machine Learning course on coursera. I am facing an issue while executing following command:
sales = graphlab.SFrame('home_data.gl/')
THe error is as follows:
IOError Traceback (most recent call last)
<ipython-input-9-e5b5a1ead746> in <module>()
----> 1 sales = graphlab.SFrame('home_data.gl')
C:\Users\admin\Anaconda2\envs\gl-env\lib\site-packages\graphlab
\data_structures\sframe.pyc in __init__(self, data, format, _proxy)
951 pass
952 else:
--> 953 raise ValueError('Unknown input type: ' + format)
954
955 sframe_size = -1
C:\Users\admin\Anaconda2\envs\gl-env\lib\site-packages\graphlab\cython\context.pyc in __exit__(self, exc_type, exc_value, traceback)
47 if not self.show_cython_trace:
48 # To hide cython trace, we re-raise from here
---> 49 raise exc_type(exc_value)
50 else:
51 # To show the full trace, we do nothing and let exception propagate
IOError: Cannot open C:/Users/admin/home_data.gl/dir_archive.ini for read. Cannot open C:/Users/admin/home_data.gl/dir_archive.ini for reading
Can you please help me to resolve this issue?

Go to terminal and run:
unzip home_data.gl.zip
You will see following files in directory home_data.gl:
Now in ipython, run:
sales = graphlab.SFrame('home_data.gl/')
sales
which will display the data in tabular format:

Related

How to create a Great Expectations Suite from a Pandas Profiling Report

As already stated in the title I want to generate so called 'assertions' via Great Expectation. I've done it the normal way by creating a connection to datasource. Now I want to combine it with Pandas Profiling, i.e. creating an Expectation Suite based on a Profiling Report. According to the documentation it should look something like this. However, it does not work as you can see in the error below.
import great_expectations as ge
import pandas as pd
from pandas_profiling import ProfileReport
import os
p = os.getcwd()
p += "\data\cars.csv"
df = pd.read_csv(p)
profile = ProfileReport(df, title="Pandas Profiling Report", explorative=True)
# Example 1
# Obtain expectation suite, this includes profiling the dataset, saving the expectation suite, validating the
# dataframe, and building data docs
suite = profile.to_expectation_suite(suite_name="cars_expectations")
That throws following error:
Summarize dataset: 100%
81/81 [00:37<00:00, 3.01it/s, Completed]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\great_expectations\data_context\data_context\base_data_context.py in run_validation_operator(self, validation_operator_name, assets_to_validate, run_id, evaluation_parameters, run_name, run_time, result_format, **kwargs)
510 try:
--> 511 validation_operator = self.validation_operators[validation_operator_name]
512 except KeyError:
KeyError: 'action_list_operator'
During handling of the above exception, another exception occurred:
DataContextError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4484/2792258824.py in <module>
16 # Obtain expectation suite, this includes profiling the dataset, saving the expectation suite, validating the
17 # dataframe, and building data docs
---> 18 suite = profile.to_expectation_suite(suite_name="cars_expectations")
C:\ProgramData\Anaconda3\lib\site-packages\pandas_profiling\expectations_report.py in to_expectation_suite(self, suite_name, data_context, save_suite, run_validation, build_data_docs, handler)
101 batch = ge.dataset.PandasDataset(self.df, expectation_suite=suite)
102
--> 103 results = data_context.run_validation_operator(
104 "action_list_operator", assets_to_validate=[batch]
105 )
C:\ProgramData\Anaconda3\lib\site-packages\great_expectations\core\usage_statistics\usage_statistics.py in usage_statistics_wrapped_method(*args, **kwargs)
302 nested_update(event_payload, args_payload_fn(*args, **kwargs))
303
--> 304 result = func(*args, **kwargs)
305 message["success"] = True
306 except Exception:
C:\ProgramData\Anaconda3\lib\site-packages\great_expectations\data_context\data_context\base_data_context.py in run_validation_operator(self, validation_operator_name, assets_to_validate, run_id, evaluation_parameters, run_name, run_time, result_format, **kwargs)
511 validation_operator = self.validation_operators[validation_operator_name]
512 except KeyError:
--> 513 raise ge_exceptions.DataContextError(
514 f"No validation operator `{validation_operator_name}` was found in your project. Please verify this in your great_expectations.yml"
515 )
DataContextError: No validation operator `action_list_operator` was found in your project. Please verify this in your great_expectations.yml
I am using:
Pandas-Profiling 3.4.0,
Great Expectations 0.15.32
Thanks for your help in advance.

TypeError: invalid path or file error even when the file exists

My code raised "invalid path or file" error even when the file exists. When I check the list of files in the path, it shows "permission denied" even though I'm root.
import rasterio
sample = pd.read_csv(os.path.join(config.BASE_PATH, "sample_submission.csv"))
test_images = glob.glob(os.path.join(config.BASE_PATH + "test_images", "**", "*.tiff"), recursive=True)
class HuBMAPDataset:
def __init__(self, idx, sz=sz, reduce=reduce):
self.data = rasterio.open(test_images, transform = identity, num_threads='all_cpus')
for idx,row in tqdm(sample.iterrows(),total=len(sample)):
idx = str(row['id'])
ds = HuBMAPDataset(idx)
Traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [89], in <cell line: 2>()
2 for idx,row in tqdm(sample.iterrows(),total=len(sample)):
3 idx = str(row['id'])
----> 4 ds = HuBMAPDataset(idx)
5 #rasterio cannot be used with multiple workers
6 dl = DataLoader(ds,bs,num_workers=0,shuffle=False,pin_memory=True)
Input In [85], in HuBMAPDataset.__init__(self, idx, sz, reduce)
14 def __init__(self, idx, sz=sz, reduce=reduce):
15 #self.data = rasterio.open(os.path.join(config.BASE_PATH, test_images,idx+'.tiff'), transform = identity, num_threads='all_cpus')
---> 16 self.data = rasterio.open(test_images, transform = identity, num_threads='all_cpus')
18 # some images have issues with their format
19 # and must be saved correctly before reading with rasterio
20 if self.data.count != 3:
File ~/anaconda3/lib/python3.9/site-packages/rasterio/env.py:442, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
439 session = DummySession()
441 with env_ctor(session=session):
--> 442 return f(*args, **kwds)
File ~/anaconda3/lib/python3.9/site-packages/rasterio/__init__.py:189, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs)
183 if not isinstance(fp, str):
184 if not (
185 hasattr(fp, "read")
186 or hasattr(fp, "write")
187 or isinstance(fp, (os.PathLike, MemoryFile, FilePath))
188 ):
--> 189 raise TypeError("invalid path or file: {0!r}".format(fp))
190 if mode and not isinstance(mode, str):
191 raise TypeError("invalid mode: {0!r}".format(mode))
TypeError: invalid path or file: ['./input/hubmap-organ-segmentation/test_images/10078.tiff']
File exists in path but permission denied.
!./input/hubmap-organ-segmentation/test_images/10078.tiff
/bin/bash: ./input/hubmap-organ-segmentation/test_images/10078.tiff: Permission denied
This is because you do not have permissions to either read or write to that file/directory.
Try to update the permission of the directory by running chmod -R 660 folder_name:. This command will recursively update the permissions to read and write and then try again.
Also, there is a possibility, that with the root user you do have permissions, but the anaconda process you run, has been started with a different user.
Try to run anaconda with root user and test, although it is not recommended. Would be much better to fix the permissions for your user. It's safer.

Invalid kube-config file. No configuration found

I am new to kubectl and kserve.
Tried to implement and create inference service using the below tutorial.
https://www.kubeflow.org/docs/external-add-ons/kserve/first_isvc_kserve/
But while creating the InferenceService I am getting the below error. Can some one help me in this.
KServe = KServeClient()
KServe.create(isvc)
Error:
---------------------------------------------------------------------------
ConfigException Traceback (most recent call last)
<ipython-input-7-0b03661604ad> in <module>()
----> 1 KServe = KServeClient()
2 KServe.create(isvc)
2 frames
/usr/local/lib/python3.7/dist-packages/kubernetes/config/kube_config.py in _get_kube_config_loader(filename, config_dict, persist_config, **kwargs)
766 if kcfg.config is None:
767 raise ConfigException(
--> 768 'Invalid kube-config file. '
769 'No configuration found.')
770 return KubeConfigLoader(
**ConfigException: Invalid kube-config file. No configuration found.**

Unable to export a python chainladder triangle to excel sheet

I am exploring the chainladder package. I tried to export a triangle structure into an excel sheet. But it throws an error. Has anyone ever faced this kind of problem. I am using chainladder==0.7.9 with pandas==0.24.2. Here is my simple code by reading their documentation https://chainladder-python.readthedocs.io/en/latest/tutorials/index.html
import pandas as pd
import numpy as np
import chainladder as cl
raa = cl.load_sample('raa')
cl.load_template('triangle', triangle=raa.latest_diagonal).to_excel('raa_example.xlsx')
I get the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\chainladder\utils\exhibits.py in load_template(template, env, **kwargs)
24 try:
---> 25 return load_yaml(template, env, **kwargs)
26 except:
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\xlcompose\templates.py in load_yaml(template, env, str_only, **kwargs)
108 else:
--> 109 return _make_xlc(yaml.load(template, Loader=yaml.SafeLoader), **kwargs)
110
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\xlcompose\templates.py in _make_xlc(template, **kwargs)
51 return core.Tabs(*[('Sheet1', item) for item in tabs])
---> 52 key = list(template.keys())[0]
53 if key in ['Row', 'Column']:
AttributeError: 'str' object has no attribute 'keys'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-17-270670213d97> in <module>
----> 1 cl.load_template('triangle', triangle=raa.latest_diagonal).to_excel('raa_example.xlsx')
2 #,type(raa.latest_diagonal)
3 type(raa_model.ultimate_)
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\chainladder\utils\exhibits.py in load_template(template, env, **kwargs)
26 except:
27 template = os.path.join(path, "templates", template.lower() + ".yaml")
---> 28 return load_yaml(template, env, **kwargs)
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\xlcompose\templates.py in load_yaml(template, env, str_only, **kwargs)
107 return template
108 else:
--> 109 return _make_xlc(yaml.load(template, Loader=yaml.SafeLoader), **kwargs)
110
111 def load_json(template, env=None, **kwargs):
e:\pyworkspace37\chainladderdemo\venv\lib\site-packages\xlcompose\templates.py in _make_xlc(template, **kwargs)
50 except:
51 return core.Tabs(*[('Sheet1', item) for item in tabs])
---> 52 key = list(template.keys())[0]
53 if key in ['Row', 'Column']:
54 return getattr(core, key)(*[_make_xlc(element, **kwargs)
AttributeError: 'str' object has no attribute 'keys'
Please let me know if I am missing something silly.
load_template is used to load a YAML template containing the specs for your Excel file. This particular template file is designed to create a standard exhibit for regular triangles, not diagonals. Templates are used to contain complex layouts, formatting, logic.
This should resolve the issue:
cl.load_template('triangle', triangle=raa).to_excel('raa_example.xlsx')
If you would simply like to export just the diagonal to Excel, you can do so without a template:
raa.latest_diagonal.to_excel('raa_example.xlsx')
# or
cl.DataFrame(raa.latest_diagonal).to_excel('raa_example.xlsx')

Runtime error in apply() function of python

I am doing a ML course on Coursera
When I run the following command
sf['Country'] = sf['Country'].apply(transform_country)
Following is the error i get
RuntimeError Traceback (most recent call last)
<ipython-input-10-e97a176c3eea> in <module>()
----> 1 sf['Country'] = sf['Country'].apply(transform_country)
F:\Anaconda2\envs\gl-env\lib\site-packages\graphlab\data_structures\sarray.pyc in apply(self, fn, dtype, skip_undefined, seed)
1892
1893 with cython_context():
-> 1894 return SArray(_proxy=self.__proxy__.transform(fn, dtype, skip_undefined, seed))
1895
1896
F:\Anaconda2\envs\gl-env\lib\site-packages\graphlab\cython\context.pyc in __exit__(self, exc_type, exc_value, traceback)
47 if not self.show_cython_trace:
48 # To hide cython trace, we re-raise from here
---> 49 raise exc_type(exc_value)
50 else:
51 # To show the full trace, we do nothing and let exception propagate
RuntimeError: Runtime Exception. Cannot evaluate lambda. Lambda workers cannot not start.
What do I do now ?

Categories

Resources