I am trying to get the names of columns in pandas.DataFrame using C API for python and numpy. I am using Microsoft Visual Studio 2015 under Windows 7 and 64 bit python v3.6. I am able to get to the df.axes[1].values, however when I use PyArray_GETPTR1, the result seems to be invalid.
Here is my code:
// Initialize python
Py_SetPythonHome(L"C:\\Program Files\\Python36");
Py_Initialize();
// Initialize numpy
import_array();
// Build script
PyCompilerFlags flags;
flags.cf_flags = PyCF_SOURCE_IS_UTF8;
PyObject* compiled = Py_CompileStringFlags(
"import numpy as np\n"
"import pandas as pd\n"
"def ReturnDataFrame():\n"
" df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]})\n"
" return df\n",
"test.py", Py_file_input, &flags
);
PyObject* codeModule = PyImport_ExecCodeModule("test.py", compiled);
// Call the function
PyObject* pFunc = PyObject_GetAttrString(codeModule, "ReturnDataFrame");
PyObject* pReturn = PyObject_CallObject(pFunc, nullptr);
Py_XDECREF(pFunc);
// Display column headers
PyObject* axes = PyObject_GetAttrString(pReturn, "axes");
PyObject* columnLabelsIndex = PyList_GET_ITEM(axes, 1);
PyObject* columns = PyObject_GetAttrString(columnLabelsIndex, "values");
const char* typeName = columns->ob_type->tp_name; // == "numpy.ndarray"
PyArrayObject* columnArray = (PyArrayObject*)columns;
int length = PyArray_DIMS(columnArray)[0];
int elementType = PyArray_TYPE(columnArray); // == NPY_OBJECT
for (int i = 0; i < length; i++)
{
PyObject* pElementValue = (PyObject*)PyArray_GETPTR1(columnArray, i);
typeName = pElementValue->ob_type->tp_name; // invalid
char* str = PyUnicode_AsUTF8(pElementValue);
std::cout << str;
std::cout << " ";
}
std::cout << "\n";
I would appreciate any input on why these values are invalid.
Related
i am trying to use https://github.com/Spyros-DC/words-in-some-editdistance/blob/master/my_distance.cpp C++ implementation in python, however i kept receiving Segmentation fault (core dumped). Below is the entire code in python and C++, i have edited small parts of the original library to suit my use case. I have managed to find that when trying to use the unordered_map children.count(remaining_w.at(0)) it is throwing the segmentation fault error. I was wondering if anyone knows what is causing the error. Thank you so much in advance.
#include <iostream>
#include <unordered_map>
#include <string>
#include <fstream>
#include <unordered_set>
#include <vector>
#include <sstream>
#include <typeinfo>
using namespace std;
// using namespace std::chrono;
class trie{
public:
string word;
unordered_map<char, trie*> children;
// this function works
// with the argument flag set to zero
int insert(string w, int flag, string remaining_w = ""){
//the first time we call insert
if(flag == 0)
remaining_w = w;
int the_size = remaining_w.size();
if(children.count(remaining_w.at(0)) == 0){
children[remaining_w.at(0)] = new trie();
}
if(the_size == 0){
word = w;
return 0;
}else{
//the recursive calls with flag one
children[remaining_w.at(0)]->insert(w, 1, remaining_w.erase(0, 1));
return 0;
}
}
};
class AutoCorrect{
public:
// The tree
trie tree;
//the dictionary with the words
const int max_cost = 2;
const int too_big_distance = 10;
void insert(char* word){
ifstream ifp(word);
while(ifp >> word){
cout << word <<endl;
tree.insert(word, 0);
// }
}
}
void test(char* test){
cout << test << endl;
}
void search_recursive(trie* p_tree, char ch, const string& word, vector<int>& previous_row, int max_cost, unordered_map <string, int>& results)
{
int sz = previous_row.size();
int min_row = 12;
vector<int> current_row(sz, too_big_distance);
current_row[0] = previous_row[0] + 1;
// Calculate the min cost of insertion, deletion, match or substution
int insert_or_del, replace;
for (int i = 1; i < sz; i++) {
insert_or_del = min(current_row[i-1] + 1, previous_row[i] + 1);
replace = (word[i-1] == ch) ? previous_row[i-1] : (previous_row[i-1] + 1);
current_row[i] = min(insert_or_del, replace);
}
if ((current_row[sz-1] <= max_cost) && (p_tree->word != "")) {
results[p_tree->word] = current_row[sz-1];
}
for(auto& it: current_row){
if (it < min_row)
min_row = it;
}
if(min_row <= max_cost){
for(auto& it: p_tree->children){
search_recursive(it.second, it.first, word, current_row, max_cost, results);
}
}
}
int search(string word)
{
unordered_map <string, int> results;
int sz = word.size();
vector<int> current_row(sz + 1);
for (int i = 0; i <= sz; ++i){
current_row[i] = i;
}
for(auto& it: tree.children){
search_recursive(it.second, it.first, word, current_row, max_cost, results);
}
for(auto& p:results)
cout << p.first << ", " << p.second << endl;
return 0;
}
};
// The cost and a distance for vector initialization
extern "C" {
AutoCorrect* AutoCorrect_new(){ return new AutoCorrect(); }
void AutoCorrect_insert(AutoCorrect* autocorrect, char* word){ autocorrect->insert(word); }
void AutoCorrect_search(AutoCorrect* autocorrect, string input_word){ autocorrect->search(input_word); }
void AutoCorrect_test(AutoCorrect* autocorrect, char* name){ autocorrect-> test(name); }
}
Python main.py:
from ctypes import cdll
lib = cdll.LoadLibrary('autocorrect.so')
class AutoCorrect(object):
def __init__(self):
self.obj = lib.AutoCorrect_new()
def insert(self, word):
lib.AutoCorrect_insert(self.obj,word)
def search(self,input_word):
lib.AutoCorrect_search(self.obj,input_word)
def test(self,test):
lib.AutoCorrect_test(self.obj,test)
if __name__ == "__main__":
import json
WordCount = 0
autocorrect = AutoCorrect()
data_dir = "some_txt_file.txt"
autocorrect.insert(bytes(str(data_dir), encoding='utf8'))
Looking at the line you specified, i think there is an instance you are trying to add a value to a non existent key:
if(children.count(remaining_w.at(0)) == 0){
children[remaining_w.at(0)] = new trie();
if "children.count" returns 0 then that character is not present, then trying to add a value on the second line there means....
what i think you want to do on that line is:
if(children.count(remaining_w.at(0)) == 1){
children[remaining_w.at(0)] = new trie();
meaning you add the value only if key is present.
Ok so I am trying to run a program that uses C++ and Python. The C++ code calls functions from the Python code. The code in C++ that is above main() was already provided and is not supposed to be edited. However, I keep getting the following error: Unhandled exception thrown: write access violation.
pValue was nullptr. When running the code with options 1 & 2. How do I fix this?
C++ Code
#include <Python.h>
#include <iostream>
#include <Windows.h>
#include <cmath>
#include <string>
#include <string.h>
using namespace std;
void CallProcedure(string pName)
{
char* procname = new char[pName.length() + 1];
std::strcpy(procname, pName.c_str());
Py_Initialize();
PyObject* my_module = PyImport_ImportModule("PythonCode");
PyErr_Print();
PyObject* my_function = PyObject_GetAttrString(my_module, procname);
PyObject* my_result = PyObject_CallObject(my_function, NULL);
Py_Finalize();
delete[] procname;
}
int callIntFunc(string proc, string param)
{
char* procname = new char[proc.length() + 1];
std::strcpy(procname, proc.c_str());
char* paramval = new char[param.length() + 1];
std::strcpy(paramval, param.c_str());
PyObject* pName, * pModule, * pDict, * pFunc, * pValue = nullptr, * presult = nullptr;
// Initialize the Python Interpreter
Py_Initialize();
// Build the name object
pName = PyUnicode_FromString((char*)"PythonCode");
// Load the module object
pModule = PyImport_Import(pName);
// pDict is a borrowed reference
pDict = PyModule_GetDict(pModule);
// pFunc is also a borrowed reference
pFunc = PyDict_GetItemString(pDict, procname);
if (PyCallable_Check(pFunc))
{
pValue = Py_BuildValue("(z)", paramval);
PyErr_Print();
presult = PyObject_CallObject(pFunc, pValue);
PyErr_Print();
}
else
{
PyErr_Print();
}
//printf("Result is %d\n", _PyLong_AsInt(presult));
Py_DECREF(pValue);
// Clean up
Py_DECREF(pModule);
Py_DECREF(pName);
// Finish the Python Interpreter
Py_Finalize();
// clean
delete[] procname;
delete[] paramval;
return _PyLong_AsInt(presult);
}
int callIntFunc(string proc, int param)
{
char* procname = new char[proc.length() + 1];
std::strcpy(procname, proc.c_str());
PyObject* pName, * pModule, * pDict, * pFunc, * pValue = nullptr, * presult = nullptr;
// Initialize the Python Interpreter
Py_Initialize();
// Build the name object
pName = PyUnicode_FromString((char*)"PythonCode");
// Load the module object
pModule = PyImport_Import(pName);
// pDict is a borrowed reference
pDict = PyModule_GetDict(pModule);
// pFunc is also a borrowed reference
pFunc = PyDict_GetItemString(pDict, procname);
if (PyCallable_Check(pFunc))
{
pValue = Py_BuildValue("(i)", param);
PyErr_Print();
presult = PyObject_CallObject(pFunc, pValue);
PyErr_Print();
}
else
{
PyErr_Print();
}
//printf("Result is %d\n", _PyLong_AsInt(presult));
Py_DECREF(pValue);
// Clean up
Py_DECREF(pModule);
Py_DECREF(pName);
// Finish the Python Interpreter
Py_Finalize();
// clean
delete[] procname;
return _PyLong_AsInt(presult);
}
void main()
{
int clicked = 0;
do {
int n = 0;
cout << "1: Display a Multiplication Table" << endl << "2: Double a Value" << endl << "3:
Exit" << endl;
cin >> clicked;
switch (clicked) {
case 1:
{ cout << "Please enter a numerical value . . ."<< endl;
cin >> n;
cout << callIntFunc("(MultiplicationTable)",n); }
break;
case 2:
{cout << "Please enter a numerical value . . ."<< endl;
cin >> n;
int dValue = callIntFunc("(DoubleValue)",n);
cout << "Doubled Value: " << dValue << endl; }
break;
case 3:
cout << "You are now exiting the program!" << endl;
break;
default:
cout << "Please enter a valid selection . . ."<< endl;
break;
}
} while (clicked != 3);
}
Python code
import re
import string
def printsomething():
print("Hello from python!")
def PrintMe(v):
print("You sent me: " + v)
return 100
def MultiplicationTable(v):
v = int(v)
for i in range(1, 11):
print(v, " x ", i, " = ", (v * i))
def DoubleValue(v):
return v * 2
def SquareValue(v):
return v * v
Just modify the python function to return 0 as follows;
def MultiplicationTable(v):
v = int(v)
for i in range(1, 11):
print(v, " x ", i, " = ", (v * i))
return 0
I created .pyd file:
#include <pybind11/pybind11.h>
#include <iostream>
#include <typeinfo>
namespace py = pybind11;
int add(int num) {
float a = 1.0;
for (int i = 0; i <= num; i = i + 1) {
a = (a + i)/a;
}
std::cout << "dll is typing: " << a << '\n';
std::cout << typeid(a).name() << std::endl;
return a;
}
PYBIND11_MODULE(py_dll, m) {
m.doc() = "pybind11 py_dll plugin"; // optional module docstring
m.def("add", &add, "Add function", py::arg("num"));
}
I call it from python:
import py_dll
num = 500
a = py_dll.add(num)
print ('python is typing: ', a)
It prints:
Why digit became int? I speak about 22. I expect it to be float 22.8722
This function
int add(int num)
takes an int as parameter and returns an int. The "problem" is not with pybind. Try this to see:
int main() {
auto x = add(42);
std::cout << "return value: " << x << '\n';
std::cout << typeid(x).name() << std::endl;
}
If the function should return a float you have to declare that it returns a float:
float add(int num)
I followed the tutorial on boost::python::numpy, found that numpy's ndarray and array could be shared inside C ++ code, and I found that using the Boost python example, I could call a python function in C ++ with arguments and return.
My goal is that boost python and python exchange numpy array values.
First, I tried to pass the numpy array to the python code with boost python. However, I only found a way to set the pylist to PyList_SET_ITEM by creating a pylist instead of a numpy array.
In C++
//https://docs.python.org/2.0/ext/buildValue.html
PyObject *Convert_Big_Array(long arr[], int length) {
PyObject *pylist, *item;
pylist = PyList_New(length);
if (pylist != NULL)
for (int i = 0; i < length; i++) {
item = PyLong_FromLong(arr[i]);
PyList_SET_ITEM(pylist, i, item);
}
return pylist;
}
int main() {
long arr[5] = { 4,3,2,6,10 };
// Python 3.x Version
Py_SetPythonHome(L"C:\\Users\\User\\Anaconda3");
PyObject *pName, *pModule, *pDict, *pFunc, *pValue, *presult;
Py_Initialize();
return 0;
}
pDict = PyModule_GetDict(pModule);
pFunc = PyDict_GetItemString(pDict, (char*)"someFunction");
if (PyCallable_Check(pFunc)) {
pValue = Py_BuildValue("(O)", Convert_Big_Array(arr, 5));
PyErr_Print();
presult = PyObject_CallObject(pFunc, pValue);
PyErr_Print();
}
else {
PyErr_Print();
return 0;
}
boost::python::handle<> handle(presult);
std::cout << std::endl << "Python ndarray :" << p::extract<char const *>(p::str(handle)) << std::endl;
Py_DECREF(pValue);
Py_DECREF(pModule);
Py_DECREF(pName);
Py_Finalize();
return 0;
}
In Python
import numpy as np
def someFunction(text):
print(text)
return np.array([1,2,3])
With this code I find it very difficult to pass a very large C int array to Python. Is there a more efficient way?
First, if I can convert a C ++ array to ndarray using np :: from_data and then convert it to PyObject, I think I can pass this object itself to python.
Second, I want to convert PyObject (presult) created with PyObject_CallObject to np :: ndarray format. Now the code is just an example and the output is successful.
In other words, do you know how to convert ndarray (C ++) -> PyObject (C ++), PyObject (numpy c ++) -> ndarray (c ++)?
I got a answer, and post Here for others...
thank you.
//#include <Python.h>
#include <stdlib.h>
#include <boost/python/numpy.hpp>
#include <boost/python.hpp>
#include <iostream>
//
////#define BOOST_PYTHON_STATIC_LIB
//
using namespace boost::python;
namespace np = boost::python::numpy;
//https://stackoverflow.com/questions/10701514/how-to-return-numpy-array-from-boostpython/14232897#14232897
np::ndarray mywrapper() {
std::vector<short> v;
v.push_back(3);
v.push_back(5);
Py_intptr_t shape[1] = { v.size() };
np::ndarray result = np::zeros(1, shape, np::dtype::get_builtin<short>());
std::copy(v.begin(), v.end(), reinterpret_cast<short*>(result.get_data()));
//std::cout <<"C++ Memory Addr : " << std::dec << &result << std::endl;
return result;
}
//https://stackoverflow.com/questions/54904448/boost-python-nullptr-while-extracting-ndarray
int main() {
double t_end = 7;
long arr[5] = { 4,3,2,6,10 };
// Python 3.x Version
Py_SetPythonHome(L"C:\\Users\\YangwooKim\\Anaconda3");
//PyObject *pName, *pModule, *pDict, *pFunc, *pValue, *presult;
Py_Initialize();
np::initialize();
object module = import("__main__");
object name_space = module.attr("__dict__");
exec_file("arbName.py", name_space, name_space);
object MyFunc = name_space["someFunction"];
object result;
//for(int i=0; i<1000000; i++)
result = MyFunc(mywrapper());
//printf("Result is %d\n", PyLong_AsLong(presult));
//np::ndarray py_array = np::from_object(boost::python::object(handle));
//auto k = extract<np::ndarray>();
//np::ndarray k = np::from_object(object);
//np::ndarray k = p::extract<np::ndarray>(object);
//const np::ndarray& ret = k();
auto result_array = extract<numpy::ndarray>(result);
const numpy::ndarray& ret = result_array();
int input_size = ret.shape(0);
short* input_ptr = reinterpret_cast<short*>(ret.get_data());
//std::cout << std::endl
// << "Python ndarray :" << p::extract<char const *>(p::str(object)) << std::endl;
std::cout << std::endl << "Python ndarray :" << input_size << std::endl;
for (int i = 0; i < input_size; ++i)
std::cout <<" " <<*(input_ptr + i) <<std::endl;
//Py_Finalize();
//Py_Finalize();
return 0;
}
I've followed all of the basic steps trying to get a python module loaded in c++, however it seems that when I try to get the dictionary of items in the script, it ignores my functions and globals that I wanted to use inside. when I iterate through the items, all I get are the builtins, file, package, path, name, and doc attributes of the script, and nothing else. I checked __name__ and it is coming up correctly ("test.py" is my py file's name and it returns "test" just fine). When I actually try to load my functions or globals (test, qa), the PyDict_GetItemString function returns NULL. What have I done wrong such that in tutorials this works fine, but in my test application, it doesn't work?
here is my Py script, maybe I've forgotten to do something that would allow my items to be seen?
qa = "hello test"
def test(a):
q = "hello world, I am " + a
#print q
return q
here is my C++ code as well, maybe I've forgotten something here?
#include <iostream>
#include <Python.h>
int main() {
Py_Initialize();
PyObject
*pName,
*pModule,
*pDict,
*pFunc,
*pArgs,
*pValue;
// get filename as a pystring
pName = PyString_FromString("test");
std::cout << std::endl << pName;
// Import module from filename
pModule = PyImport_Import(pName);
std::cout << std::endl << pModule;
// build the module's dict
pDict = PyModule_GetDict(pModule);
std::cout << std::endl << pDict << " " << PyDict_Size(pDict);
PyObject* keys = PyDict_Keys(pDict);
int s = PyList_Size(keys);
for (int i = 0; i < s; ++i) {
PyObject* item = PyList_GetItem(keys, i);
printf("\n");
printf(PyString_AsString(item));
}
PyObject* testvar = PyDict_GetItemString(pDict, "qa");
printf(PyString_AsString(testvar));
// get a function from the dict
pFunc = PyDict_GetItemString(pDict, "test");
std::cout << std::endl << pFunc;
// build the arg tuple
pArgs = PyTuple_New(1);
// create an argument
pValue = PyString_FromString("cee programme");
// set an argument
PyTuple_SetItem(pArgs, 0, pValue);
// call the function with the func and the args
PyObject* pResult = PyObject_CallObject(pFunc, pArgs);
// error checking
if (pResult == NULL) {
printf("\nis broek");
}
char* res = PyString_AsString(pResult);
// "destroy the interpreter"
Py_Finalize();
printf(res);
return 0;
}