Skip to content

Commit

Permalink
Migrate from the main kaldi repo
Browse files Browse the repository at this point in the history
  • Loading branch information
janchorowski committed Jan 7, 2015
1 parent 3f14fa1 commit 9f3fdb2
Show file tree
Hide file tree
Showing 13 changed files with 1,497 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __pycache__/

# C extensions
*.so
.depend.mk

# Distribution / packaging
.Python
Expand Down Expand Up @@ -52,3 +53,6 @@ docs/_build/

# PyBuilder
target/

# Temp files
*~
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SRCDIR = kaldi-python

ifndef KALDI_ROOT
$(error please set KALDI_ROOT to point ot the base of the kaldi installation)
endif

.PHONY: all

all:
$(MAKE) -C $(SRCDIR) depend
$(MAKE) -C $(SRCDIR)

clean:
$(MAKE) -C $(SRCDIR) clean
23 changes: 23 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
This is a set of Python wrappers for Kaldi input-output classes.

It allows you to do e.g.:

In [1]: import kaldi_io
In [2]: feat_reader = kaldi_io.SequentialBaseFloatMatrixReader('scp:./mfcc/raw_mfcc_test.1.scp')
In [3]: next(feat_reader)
Out[3]:
('FDHC0_SI1559', Shape: (338, 13)
[[ 47.97408295 -21.51651001 -24.72166443 ..., -7.34391451 -5.35192871
1.24314117]
[ 46.00983429 -19.34067917 -20.49114227 ..., -2.23715401 -3.65503502
-1.64697027]
[ 43.06345367 -21.29892731 -15.17295933 ..., -6.0672245 -14.09746265
-9.02336311]
...,
[ 37.66175842 -27.93688965 -10.73719597 ..., -4.36497116 -3.1932559
2.3135519 ]
[ 38.15282059 -30.81328964 -11.75108433 ..., -6.77649689 -3.78556442
2.52763462]
[ 38.64388275 -29.08744812 -9.59657097 ..., -1.66973591 -0.54327661
9.77887821]])

41 changes: 41 additions & 0 deletions kaldi-python/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

all:
EXTRA_CXXFLAGS = -Wno-sign-compare

KALDI_SRC = $(KALDI_ROOT)/src

include $(KALDI_SRC)/kaldi.mk

BINFILES =


OBJFILES =


ADDLIBS = $(KALDI_SRC)/lm/kaldi-lm.a $(KALDI_SRC)/decoder/kaldi-decoder.a $(KALDI_SRC)/lat/kaldi-lat.a \
$(KALDI_SRC)/hmm/kaldi-hmm.a $(KALDI_SRC)/transform/kaldi-transform.a $(KALDI_SRC)/gmm/kaldi-gmm.a \
$(KALDI_SRC)/tree/kaldi-tree.a $(KALDI_SRC)/matrix/kaldi-matrix.a $(KALDI_SRC)/util/kaldi-util.a \
$(KALDI_SRC)/base/kaldi-base.a $(KALDI_SRC)/thread/kaldi-thread.a

TESTFILES =

PYLIB = $(shell python-config --libs)
PYINC = $(shell python-config --includes)
NPINC = -I$(shell python -c 'import numpy; print numpy.get_include()')

PYLIBS = kaldi_io_internal.so

#include $(KALDI_SRC)/makefiles/default_rules.mk

%.so: %.cpp
g++ -shared -o $@ -Wall -fPIC -I$(KALDI_SRC) $(PYINC) $(NPINC) $(CXXFLAGS) $< $(ADDLIBS) $(LDFLAGS) -L$(PYLIB) $(LOADLIBES) $(LDLIBS) -lpython2.7 -lboost_python -lboost_system

clean:
-rm -f *.o *.a *.so $(TESTFILES) $(BINFILES) $(TESTOUTPUTS) tmp* *.tmp

depend:
-$(CXX) -I$(KALDI_SRC) $(PYINC) $(NPINC) -M $(CXXFLAGS) *.cpp > .depend.mk

test:

all: $(PYLIBS)
175 changes: 175 additions & 0 deletions kaldi-python/bp_converters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* bp_converters.h
*
* Created on: Aug 28, 2014
* Author: chorows
*/

#ifndef BP_CONVERTERS_H_
#define BP_CONVERTERS_H_

#include <vector>

#include <boost/shared_ptr.hpp>
#include <boost/static_assert.hpp>

#include <boost/python.hpp>
#include <boost/python/operators.hpp>
#include <boost/python/stl_iterator.hpp>



namespace kaldi {
//
// Code transformend from http://code.activestate.com/lists/python-cplusplus-sig/16463/ and
// http://misspent.wordpress.com/2009/09/27/how-to-write-boost-python-converters/
//
template<typename T>
struct VectorToListBPConverter {

static PyObject* convert(std::vector<T> const& vec) {
boost::python::list l;

for (size_t i = 0; i < vec.size(); i++)
l.append(vec[i]);
return boost::python::incref(l.ptr());
}
};

template<typename T>
struct VectorFromListBPConverter {
VectorFromListBPConverter() {
using namespace boost::python;
using namespace boost::python::converter;
boost::python::converter::registry::push_back(
&VectorFromListBPConverter<T>::convertible,
&VectorFromListBPConverter<T>::construct, type_id<std::vector<T> >());
}

// Determine if obj_ptr can be converted in a std::vector<T>
static void* convertible(PyObject* obj_ptr) {
// if (!PyIter_Check(obj_ptr)) {
// return 0;
// }
return obj_ptr;
}

// Convert obj_ptr into a std::vector<T>
static void construct(
PyObject* obj_ptr,
boost::python::converter::rvalue_from_python_stage1_data* data) {

boost::python::object o = boost::python::object(boost::python::handle<>(boost::python::borrowed(obj_ptr)));
boost::python::stl_input_iterator<T> begin(o);
boost::python::stl_input_iterator<T> end;

// Grab pointer to memory into which to construct the new std::vector<T>
void* storage = ((boost::python::converter::rvalue_from_python_storage<
std::vector<T> >*) data)->storage.bytes;

// in-place construct the new std::vector<T> using the character data
// extraced from the python object
std::vector<T>& v = *(new (storage) std::vector<T>());

v.insert(v.end(), begin, end);

// Stash the memory chunk pointer for later use by boost.python
data->convertible = storage;
}
};

template<typename M>
struct MapFromDictBPConverter {
MapFromDictBPConverter() {
boost::python::converter::registry::push_back(
&MapFromDictBPConverter<M>::convertible,
&MapFromDictBPConverter<M>::construct, boost::python::type_id<M>());
}

// Determine if obj_ptr can be converted in a std::vector<T>
static void* convertible(PyObject* obj_ptr) {
if (!PyDict_Check(obj_ptr)) {
return 0;
}
return obj_ptr;
}

// Convert obj_ptr into a std::vector<T>
static void construct(
PyObject* obj_ptr,
boost::python::converter::rvalue_from_python_stage1_data* data) {

boost::python::dict obj(boost::python::handle<>(boost::python::borrowed(obj_ptr)));
boost::python::list keys = obj.keys();

// Grab pointer to memory into which to construct the new std::vector<T>
void* storage = ((boost::python::converter::rvalue_from_python_storage< M >*) data)->storage.bytes;

M& map = *(new (storage) M());

boost::python::stl_input_iterator<typename M::key_type> begin(keys);
boost::python::stl_input_iterator<typename M::key_type> end;

for (;begin!=end; ++begin) {
const typename M::key_type& k = *begin;
const typename M::mapped_type& v = boost::python::extract<typename M::mapped_type>(obj[k]);
map[k] = v;
}

// Stash the memory chunk pointer for later use by boost.python
data->convertible = storage;
}
};


template<typename T1, typename T2>
struct PairToTupleBPConverter {

static PyObject* convert(std::pair<T1,T2> const& p) {
return boost::python::incref(boost::python::make_tuple(p.first, p.second).ptr());
}
};

template<typename T1, typename T2>
struct PairFromTupleBPConverter {
PairFromTupleBPConverter() {
boost::python::converter::registry::push_back(
&PairFromTupleBPConverter<T1, T2>::convertible,
&PairFromTupleBPConverter<T1, T2>::construct, boost::python::type_id<std::pair<T1,T2> >());
}

// Determine if obj_ptr can be converted in a std::vector<T>
static void* convertible(PyObject* obj_ptr) {
if (!PyTuple_Check(obj_ptr) || PySequence_Length(obj_ptr)!=2) {
return 0;
}
return obj_ptr;
}

// Convert obj_ptr into a std::vector<T>
static void construct(
PyObject* obj_ptr,
boost::python::converter::rvalue_from_python_stage1_data* data) {

boost::python::tuple t = boost::python::tuple(boost::python::handle<>(boost::python::borrowed(obj_ptr)));

// Grab pointer to memory into which to construct the new std::vector<T>
void* storage = ((boost::python::converter::rvalue_from_python_storage<
std::pair<T1,T2> >*) data)->storage.bytes;

// in-place construct the new std::vector<T> using the character data
// extraced from the python object
std::pair<T1,T2>& v = *(new (storage) std::pair<T1,T2>());

v.first=boost::python::extract<T1>(t[0]);
v.second=boost::python::extract<T2>(t[1]);

// Stash the memory chunk pointer for later use by boost.python
data->convertible = storage;
}
};


}

#endif /* BP_CONVERTERS_H_ */
78 changes: 78 additions & 0 deletions kaldi-python/kaldi_argparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
'''
Created on Aug 14, 2014
@author: chorows
'''

import os
import sys
import argparse

#import __main__

class AddConfig(argparse.Action):
def __init__(self, *args, **kwargs):
argparse.Action.__init__(self, *args, **kwargs)

def __call__(self, parser, namespace, values, option_string=None):
with open(values,'r') as f:
opts = [l.split('#')[0].strip() for l in f]
parser.parse_args(args=opts, namespace=namespace)

class KaldiArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
kwargs['add_help']=False
#kwargs['fromfile_prefix_chars']='--config='
version = kwargs.pop('version', None)
super(KaldiArgumentParser, self).__init__(*args, **kwargs)
self.version = version

def add_standard_arguments(self):
grp = self.add_argument_group('Standard options')

default_prefix = '-'
grp.add_argument(
default_prefix+'h', default_prefix*2+'help',
action='help', default=argparse.SUPPRESS,
help=argparse._('show this help message and exit'))
if self.version:
grp.add_argument(
default_prefix+'v', default_prefix*2+'version',
action='version', default=argparse.SUPPRESS,
version=self.version,
help=argparse._("show program's version number and exit"))
grp.add_argument('--print-args', type=bool, default=True, help='Print the command line arguments (to stderr)')
#grp.add_argument('--config', action=AddConfig, help='Configuration file with options')
grp.add_argument('--config', default=argparse.SUPPRESS, help='Configuration file with options')


def parse_known_args(self, args=None, namespace=None):
if args is None:
args = sys.argv[1:]
expanded_args = []

next_arg_is_conf = False
conf_file = None

for arg in args:
if arg.startswith('--config') or next_arg_is_conf:
if next_arg_is_conf:
conf_file = arg
elif arg.startswith('--config='):
conf_file = arg[9:].strip() #eat --config=
else:
next_arg_is_conf = True
if conf_file:
with open(conf_file,'r') as f:
expanded_args.extend(l.split('#')[0].strip() for l in f)
next_arg_is_conf = False
conf_file = None
else:
expanded_args.append(arg)
return argparse.ArgumentParser.parse_known_args(self, args=expanded_args, namespace=namespace)

def parse_args(self, args=None, namespace=None):
args = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
if args.print_args:
print >>sys.stderr, os.path.basename(sys.argv[0]), " ".join(sys.argv[1:])
return args
Loading

0 comments on commit 9f3fdb2

Please sign in to comment.