-
Notifications
You must be signed in to change notification settings - Fork 371
/
setup.py
51 lines (38 loc) · 1.62 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
pdftabextract setuptools based setup module
"""
from setuptools import setup
setup(
name='pdftabextract',
version='0.4.0-dev',
description='A set of tools for data mining (OCR-processed) PDFs',
long_description="""This repository contains a set of tools written in Python 3 with the aim to extract tabular
data from scanned and OCR-processed documents available as PDF files. Before these files can be processed they need
to be converted to XML files in pdf2xml format using poppler utils. Further information and examples can be found
in the github repository.""",
url='https://github.com/WZBSocialScienceCenter/pdftabextract',
author='Markus Konrad',
author_email='[email protected]',
license='Apache 2.0',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Science/Research',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Utilities',
],
keywords='datamining ocr pdf tabular data mining extract extraction',
packages=['pdftabextract'],
include_package_data=True,
install_requires=['numpy', 'opencv-python', 'scipy'],
extras_require = {
'pandas_dataframes': ['pandas'],
}
)