create_collection() got an unexpected keyword argument 'embedding_fn'

create_collection() got an unexpected keyword argument 'embedding_fn' - python

I was trying to use the langchain library to create a question answering system. But when I try to search in the document using the chromadb library it gives this error:
TypeError: create_collection() got an unexpected keyword argument 'embedding_fn'
Here's the code am working on
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Chroma
loader = TextLoader('./info.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings).
The last line generates the error.
This is the complete error message:
TypeError Traceback (most recent call last)
Input In [36], in <cell line: 1>()
----> 1 docsearch = Chroma.from_documents(texts, embeddings)
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:212, in Chroma.from_documents(cls, documents, embedding, ids, collection_name, persist_directory, **kwargs)
210 texts = [doc.page_content for doc in documents]
211 metadatas = [doc.metadata for doc in documents]
--> 212 return cls.from_texts(
213 texts=texts,
214 embedding=embedding,
215 metadatas=metadatas,
216 ids=ids,
217 collection_name=collection_name,
218 persist_directory=persist_directory,
219 )
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:178, in Chroma.from_texts(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, **kwargs)
151 #classmethod
152 def from_texts(
153 cls,
(...)
160 **kwargs: Any,
161 ) -> Chroma:
162 """Create a Chroma vectorstore from a raw documents.
163
164 If a persist_directory is specified, the collection will be persisted there.
(...)
176 Chroma: Chroma vectorstore.
177 """
--> 178 chroma_collection = cls(
179 collection_name=collection_name,
180 embedding_function=embedding,
181 persist_directory=persist_directory,
182 )
183 chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids)
184 return chroma_collection
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:65, in Chroma.__init__(self, collection_name, embedding_function, persist_directory)
60 logger.warning(
61 f"Collection {collection_name} already exists,"
62 " Do you have the right embedding function?"
63 )
64 else:
---> 65 self._collection = self._client.create_collection(
66 name=collection_name,
67 embedding_fn=self._embedding_function.embed_documents
68 if self._embedding_function is not None
69 else None,
70 )
TypeError: create_collection() got an unexpected keyword argument 'embedding_fn'

The create_collection method of chromadb.Client was changed 2 days ago and the embedding_fn parameter was renamed to embedding_function:
https://github.com/chroma-core/chroma/commit/6ce2388e219d47048e854be72be54617df647224
The source code for the langchain.vectorstores.chroma.Chroma class as of version 0.0.87 seems to have been updated already (3 hours before you asked the question) to match the chromadb library:
https://github.com/hwchase17/langchain/commit/34cba2da3264ccc9100f7efd16807c8d2a51734c
So you should be able to fix the problem by installing the newest version of LangChain.

Related

Altair: NoMatchingVersions when saving maps with selenium

I am trying to create a series and save them iteratively.
The creation works well but while saving I get the following error:
---------------------------------------------------------------------------
NoMatchingVersions Traceback (most recent call last)
<ipython-input-103-e75c3f4b4fa5> in <module>
29 chart=(background + chart).configure_view(stroke='white')
30 filename = f"{scenario}.svg"
---> 31 save(chart, filename, method='selenium', webdriver=driver)
~\Anaconda3\lib\site-packages\altair_saver\_core.py in save(chart, fp, fmt, mode, method, **kwargs)
75 saver = Saver(spec, mode=mode, **kwargs)
76
---> 77 saver.save(fp=fp, fmt=fmt)
78
79
~\Anaconda3\lib\site-packages\altair_saver\savers\_saver.py in save(self, fp, fmt)
86 raise ValueError(f"Got fmt={fmt}; expected one of {self.valid_formats}")
87
---> 88 content = self.mimebundle(fmt).popitem()[1]
89 if isinstance(content, dict):
90 with maybe_open(fp, "w") as f:
~\Anaconda3\lib\site-packages\altair_saver\savers\_saver.py in mimebundle(self, fmts)
66 f"invalid fmt={fmt!r}; must be one of {self.valid_formats}."
67 )
---> 68 bundle.update(self._mimebundle(fmt))
69 return bundle
70
~\Anaconda3\lib\site-packages\altair_saver\savers\_selenium.py in _mimebundle(self, fmt)
249
250 def _mimebundle(self, fmt: str) -> Mimebundle:
--> 251 out = self._extract(fmt)
252 mimetype = fmt_to_mimetype(
253 fmt,
~\Anaconda3\lib\site-packages\altair_saver\savers\_selenium.py in _extract(self, fmt)
209 js_resources = {
210 "vega.js": get_bundled_script("vega", self._vega_version),
--> 211 "vega-lite.js": get_bundled_script("vega-lite", self._vegalite_version),
212 "vega-embed.js": get_bundled_script(
213 "vega-embed", self._vegaembed_version
~\Anaconda3\lib\site-packages\altair_viewer\_scripts.py in get_bundled_script(package, version)
36 f"package {package!r} not recognized. Available: {list(listing)}"
37 )
---> 38 version_str = find_version(version, listing[package])
39 content = pkgutil.get_data("altair_viewer", f"scripts/{package}-{version_str}.js")
40 if content is None:
~\Anaconda3\lib\site-packages\altair_viewer\_utils.py in find_version(version, candidates, strict_micro)
190 if not matches:
191 raise NoMatchingVersions(
--> 192 f"No matches for version={version!r} among {candidates}"
193 )
194 return str(matches[-1])
NoMatchingVersions: No matches for version='4.8.1' among ['4.0.2']
I am using selenium and altair_saver:
from altair_saver import save
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r'pathtochromedriver/chromedriver_win32/chromedriver.exe')
for i, scenario in enumerate(scenario_columns):
chart=makechart(scenario, i)
filename = f"{scenario}.svg"
save(chart, filename, method='selenium', webdriver=driver)
Here `scenario` is a string without special characters.

You need to update the altair_viewer package to a newer version:
$ pip install -U altair_viewer
(This error was improved in https://github.com/altair-viz/altair_viewer/pull/33, so shouldn't be as mysterious when it comes up in the future).

what is the problem ?: application.connect() error

I am a beginner developer who started to study automation by using pywinauto.
An overflow error occurs when using application.connect () to connect to an already open program.
But application.start() works fine....
Please help me if someone know this part.
The source code and error contents are as follows.
Source code:
import pywinauto
app = pywinauto.application.Application()
app.connect(title_re='Calculator')
Error:
OverflowError Traceback (most recent call last)
in
1 import pywinauto
2 app = pywinauto.application.Application()
----> 3 app.connect(title_re='Calculator')
d:\Anaconda3\lib\site-packages\pywinauto\application.py in connect(self, **kwargs)
972 ).process_id
973 else:
--> 974 self.process = findwindows.find_element(**kwargs).process_id
975 connected = True
976
d:\Anaconda3\lib\site-packages\pywinauto\findwindows.py in find_element(**kwargs)
82 so please see :py:func:find_elements for the full parameters description.
83 """
---> 84 elements = find_elements(**kwargs)
85
86 if not elements:
d:\Anaconda3\lib\site-packages\pywinauto\findwindows.py in find_elements(class_name, class_name_re, parent, process, title, title_re, top_level_only, visible_only, enabled_only, best_match, handle, ctrl_index, found_index, predicate_func, active_only, control_id, control_type, auto_id, framework_id, backend, depth)
279 return title_regex.match(t)
280 return False
--> 281 elements = [elem for elem in elements if _title_match(elem)]
282
283 if visible_only:
d:\Anaconda3\lib\site-packages\pywinauto\findwindows.py in (.0)
279 return title_regex.match(t)
280 return False
--> 281 elements = [elem for elem in elements if _title_match(elem)]
282
283 if visible_only:
d:\Anaconda3\lib\site-packages\pywinauto\findwindows.py in _title_match(w)
275 def _title_match(w):
276 """Match a window title to the regexp"""
--> 277 t = w.rich_text
278 if t is not None:
279 return title_regex.match(t)
d:\Anaconda3\lib\site-packages\pywinauto\win32_element_info.py in rich_text(self)
81 def rich_text(self):
82 """Return the text of the window"""
---> 83 return handleprops.text(self.handle)
84
85 name = rich_text
d:\Anaconda3\lib\site-packages\pywinauto\handleprops.py in text(handle)
86 length += 1
87
---> 88 buffer_ = ctypes.create_unicode_buffer(length)
89
90 ret = win32functions.SendMessage(
d:\Anaconda3\lib\ctypes_init_.py in create_unicode_buffer(init, size)
286 return buf
287 elif isinstance(init, int):
--> 288 buftype = c_wchar * init
289 buf = buftype()
290 return buf
OverflowError: cannot fit 'int' into an index-sized integer

import pywinauto
app = pywinauto.Application(backend='uia').start('calc.exe')
try this if you are having problem you have to say the backennd is a uia it working fine for me.

Type error making a betfair.py API call

I've just been moving some code over to a Ubuntu 16.04.2 anaconda setup, and am getting a type error I don't understand when calling code which works fine across numerous other machines.
The error replicates for me just off of the list all tennis markets sample code in the repo below, as well as a request like:
from betfair import Betfair
client = Betfair("app key needed here", "path to ssh key here")
client.login(username, password)
client.keep_alive()
client.list_market_book(market_ids=['1.135391020'], price_projection=dict(priceData=['EX_BEST_OFFERS']))
or
from betfair.models import MarketFilter
event_types = client.list_event_types(
MarketFilter(text_query='tennis')
)
print(len(event_types)) # 2
print(event_types[0].event_type.name) # 'Tennis'
tennis_event_type = event_types[0]
markets = client.list_market_catalogue(
MarketFilter(event_type_ids=[tennis_event_type.event_type.id])
)
markets[0].market_name
Both throw the following type error despite identical code working on a windows installation:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-69b51cf78438> in <module>()
1 event_types = client.list_event_types(
----> 2 MarketFilter(text_query='tennis')
3 )
4 print(len(event_types)) # 2
5 print(event_types[0].event_type.name) # 'Tennis'
<decorator-gen-125> in list_event_types(self, filter, locale)
/home/user/anaconda2/lib/python2.7/site-packages/betfair/utils.pyc in requires_login(func, *args, **kwargs)
121 self = args[0]
122 if self.session_token:
--> 123 return func(*args, **kwargs)
124 raise exceptions.NotLoggedIn()
/home/user/anaconda2/lib/python2.7/site-packages/betfair/betfair.pyc in list_event_types(self, filter, locale)
148 'listEventTypes',
149 utils.get_kwargs(locals()),
--> 150 model=models.EventTypeResult,
151 )
152
/home/user/anaconda2/lib/python2.7/site-packages/betfair/betfair.pyc in make_api_request(self, base, method, params, codes, model)
87 utils.check_status_code(response, codes=codes)
88 result = utils.result_or_error(response)
---> 89 return utils.process_result(result, model)
90
91 # Authentication methods
/home/user/anaconda2/lib/python2.7/site-packages/betfair/utils.pyc in process_result(result, model)
81 return result
82 if isinstance(result, collections.Sequence):
---> 83 return [model(**item) for item in result]
84 return model(**result)
85
/home/user/anaconda2/lib/python2.7/site-packages/betfair/meta/models.pyc in __init__(self, **data)
24 def __init__(self, **data):
25 super(BetfairModel, self).__init__()
---> 26 self.import_data(data)
27
28 def import_data(self, data, **kwargs):
/home/user/anaconda2/lib/python2.7/site-packages/betfair/meta/models.pyc in import_data(self, data, **kwargs)
28 def import_data(self, data, **kwargs):
29 kwargs['strict'] = False
---> 30 return super(BetfairModel, self).import_data(data, **kwargs)
/home/user/anaconda2/lib/python2.7/site-packages/schematics/models.pyc in import_data(self, raw_data, recursive, **kwargs)
269 The data to be imported.
270 """
--> 271 data = self._convert(raw_data, trusted_data=_dict(self), recursive=recursive, **kwargs)
272 self._data.converted.update(data)
273 if kwargs.get('validate'):
/home/user/anaconda2/lib/python2.7/site-packages/schematics/models.pyc in _convert(self, raw_data, context, **kwargs)
293 should_validate = getattr(context, 'validate', kwargs.get('validate', False))
294 func = validate if should_validate else convert
--> 295 return func(self._schema, self, raw_data=raw_data, oo=True, context=context, **kwargs)
296
297 def export(self, field_converter=None, role=None, app_data=None, **kwargs):
/home/user/anaconda2/lib/python2.7/site-packages/schematics/transforms.pyc in convert(cls, mutable, raw_data, **kwargs)
427
428 def convert(cls, mutable, raw_data=None, **kwargs):
--> 429 return import_loop(cls, mutable, raw_data, import_converter, **kwargs)
430
431
/home/user/anaconda2/lib/python2.7/site-packages/schematics/transforms.pyc in import_loop(schema, mutable, raw_data, field_converter, trusted_data, mapping, partial, strict, init_values, apply_defaults, convert, validate, new, oo, recursive, app_data, context)
153 field_context = context
154 try:
--> 155 value = _field_converter(field, value, field_context)
156 except (FieldError, CompoundError) as exc:
157 errors[serialized_field_name] = exc
/home/user/anaconda2/lib/python2.7/site-packages/schematics/transforms.pyc in __call__(self, *args)
354
355 def __call__(self, *args):
--> 356 return self.func(*args)
357
358
/home/user/anaconda2/lib/python2.7/site-packages/schematics/transforms.pyc in import_converter(field, value, context)
382 if value is None or value is Undefined:
383 return value
--> 384 return field.convert(value, context)
385
386
/home/user/anaconda2/lib/python2.7/site-packages/schematics/types/compound.pyc in convert(self, value, context)
34 def convert(self, value, context=None):
35 context = context or get_import_context()
---> 36 return self._convert(value, context)
37
38 def _convert(self, value, context):
/home/user/anaconda2/lib/python2.7/site-packages/schematics/types/compound.pyc in _convert(self, value, context)
131 "Input must be a mapping or '%s' instance" % self.model_class.__name__)
132 if context.convert and context.oo:
--> 133 return model_class(value, context=context)
134 else:
135 return model_class.convert(value, context=context)
TypeError: __init__() takes exactly 1 argument (3 given)
Somewhat weirder, a request like:
client.list_market_catalogue(MarketFilter(market_ids=['1.135391020']))
Works fine.
python 2.7.13, Anaconda 4.4.0, Ubuntu 16.04.2
Any idea what could be causing this?

From the trace it looks to me like the schematics library is your issue. Checking the open issues on the betfair github there is, as of this writing, an open ticket regarding schematics breaking the api. It would appear that the author has left schematics out of the requirements and that version 1.1.1 is required. My guess is you have schematics 2.0 installed on the computer causing the issue.
One way to find this in the future would be to pip freeze the working environment and diff the broken environment. Moreover, when moving to a new machine you can use the output of pip freeze to duplicate the environment and avoid messy version issues like this.

CountVectorizer throws error on fit_transform after adding stop words

I have two sections of code. One works, and one does not.
The following code runs as expected without error: (Note: postrain, negtrain, postest, and negtest are lists of strings defined earlier.)
from sklearn.feature_extraction.text import CountVectorizer
vector = CountVectorizer()
train_vector = vector.fit_transform(postrain+negtrain)
test_vector = vector.transform(postest+negtest)
print test_vector.shape
However, this code throws an error:
import re
stop = [re.split('\n|\t', open('stop_words.txt').read())]
vector2 = CountVectorizer(stop_words=stop)
train_vector = vector2.fit_transform(postrain+negtrain) # <-- Error occurs here
test_vector = vector2.transform(postest+negtest)
print test_vector.shape
the error:
TypeErrorTraceback (most recent call last)
<ipython-input-43-cf5f4754d58c> in <module>()
7
8 vector2 = CountVectorizer(stop_words=stop)
----> 9 train_vector = vector2.fit_transform(postrain+negtrain)
10 test_vector = vector2.transform(postest+negtest)
11
C:\Users\Nsth\Anaconda2\envs\cs489\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y)
815
816 vocabulary, X = self._count_vocab(raw_documents,
--> 817 self.fixed_vocabulary_)
818
819 if self.binary:
C:\Users\Nsth\Anaconda2\envs\cs489\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab)
745 vocabulary.default_factory = vocabulary.__len__
746
--> 747 analyze = self.build_analyzer()
748 j_indices = _make_int_array()
749 indptr = _make_int_array()
C:\Users\Nsth\Anaconda2\envs\cs489\lib\site-packages\sklearn\feature_extraction\text.pyc in build_analyzer(self)
232
233 elif self.analyzer == 'word':
--> 234 stop_words = self.get_stop_words()
235 tokenize = self.build_tokenizer()
236
C:\Users\Nsth\Anaconda2\envs\cs489\lib\site-packages\sklearn\feature_extraction\text.pyc in get_stop_words(self)
215 def get_stop_words(self):
216 """Build or fetch the effective stop words list"""
--> 217 return _check_stop_list(self.stop_words)
218
219 def build_analyzer(self):
C:\Users\Nsth\Anaconda2\envs\cs489\lib\site-packages\sklearn\feature_extraction\text.pyc in _check_stop_list(stop)
92 return None
93 else: # assume it's a collection
---> 94 return frozenset(stop)
95
96
TypeError: unhashable type: 'list'
How did adding stop words cause the error?

I'm dumb. It should have been:
stop = re.split('\n|\t', open('stop_words.txt').read())
without the brackets. Not sure why it threw the error on the line after that though.

Pyspark ml can't fit the model and always "AttributeError: 'PipelinedRDD' object has no attribute '_jdf'

data = sqlContext.sql("select a.churn,b.pay_amount,c.all_balance from db_bi.t_cust_churn a left join db_bi.t_cust_pay b on a.cust_id=b.cust_id left join db_bi.t_cust_balance c on a.cust_id=c.cust_id limit 5000").cache()
def labelData(df):
return df.map(lambda row: LabeledPoint(row[0], row[1:]))
traindata = labelData(data) --this step works well.
from pyspark.ml.classification import LogisticRegression
lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
lrModel = lr.fit(lrdata)
lrModel = lr.fit(lrdata)
AttributeError Traceback (most recent call last)
<ipython-input-40-b84a106121e6> in <module>()
----> 1 lrModel = lr.fit(lrdata)
/home/hadoop/spark/python/pyspark/ml/pipeline.pyc in fit(self, dataset, params)
67 return self.copy(params)._fit(dataset)
68 else:
---> 69 return self._fit(dataset)
70 else:
71 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
/home/hadoop/spark/python/pyspark/ml/wrapper.pyc in _fit(self, dataset)
131
132 def _fit(self, dataset):
--> 133 java_model = self._fit_java(dataset)
134 return self._create_model(java_model)
135
/home/hadoop/spark/python/pyspark/ml/wrapper.pyc in _fit_java(self, dataset)
128 """
129 self._transfer_params_to_java()
--> 130 return self._java_obj.fit(dataset._jdf)
131
132 def _fit(self, dataset):
AttributeError: 'PipelinedRDD' object has no attribute '_jdf'

I guess you are using the tutorial for the latest spark version (2.0.1) with
pyspark.ml.classification import LogisticRegression whereas you need some other version, e.g. 1.6.2 with pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel. Note the different libraries.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

create_collection() got an unexpected keyword argument 'embedding_fn' - python

Related

Altair: NoMatchingVersions when saving maps with selenium

what is the problem ?: application.connect() error

Type error making a betfair.py API call

CountVectorizer throws error on fit_transform after adding stop words

Pyspark ml can't fit the model and always "AttributeError: 'PipelinedRDD' object has no attribute '_jdf'

Categories

Resources