Skip to content

Commit

Permalink
Merge pull request deepset-ai#3 from deepset-ai/dep-groups
Browse files Browse the repository at this point in the history
speech2text: reduce to Whisper
  • Loading branch information
ZanSara committed Mar 2, 2023
2 parents 78b70e3 + 2f2b4b1 commit a9e9a46
Show file tree
Hide file tree
Showing 25 changed files with 457 additions and 474 deletions.
2 changes: 1 addition & 1 deletion .github/actions/python_cache/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ inputs:
pythonVersion:
description: 'Python version to use'
required: true
default: "3.7"
default: "3.8"
package:
description: 'Package to install'
required: true
Expand Down
63 changes: 39 additions & 24 deletions .github/utils/generate_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@

try:
from importlib import metadata
except ImportError: # for Python<3.8
except ImportError: # for Python<3.8
import importlib_metadata as metadata

from haystack.nodes._json_schema import find_subclasses_in_modules, create_schema_for_node_class
from haystack.nodes._json_schema import (
find_subclasses_in_modules,
create_schema_for_node_class,
)


logging.basicConfig(level=logging.INFO)
Expand All @@ -27,13 +30,17 @@
BRANCH_NAME = "text2speech" # FIXME should be main after merge


def get_package_json_schema(title: str, description: str, module_name: str, schema_ref: str):
def get_package_json_schema(
title: str, description: str, module_names: List[str], schema_ref: str
):
"""
Generate JSON schema for the custom node(s).
"""
# List all known nodes in the given modules
importlib.import_module(module_name)
possible_node_classes = find_subclasses_in_modules(importable_modules=[module_name])
possible_node_classes = []
for module_name in module_names:
importlib.import_module(module_name)
possible_node_classes += find_subclasses_in_modules(importable_modules=[module_name])

# Build the definitions and refs for the nodes
schema_definitions = []
Expand All @@ -58,7 +65,7 @@ def update_json_schema(
title: str,
description: str,
package_name: str,
module_name: str,
module_names: str,
):
"""
If the version contains "rc", only update main's schema.
Expand All @@ -71,8 +78,8 @@ def update_json_schema(
package_schema = get_package_json_schema(
title=title,
description=description,
module_name=module_name,
schema_ref=base_schema_ref + main_filename
module_names=module_names,
schema_ref=base_schema_ref + main_filename,
)

# Update mains's schema
Expand Down Expand Up @@ -127,39 +134,47 @@ def generate_schema_index(
"title": title,
"description": description,
"type": "object",
"oneOf": []
"oneOf": [],
}
with open(destination_path / index_name, "w") as json_file:
json.dump(index, json_file, indent=2)



def get_package_data(folder: str):
package_name = "haystack-"+folder
package_name = "haystack-" + folder
meta = metadata.metadata(package_name)
return {
"package_name": package_name,
"version": metadata.version("haystack-"+folder),
"version": metadata.version("haystack-" + folder),
"title": str(meta["name"]).replace("-", " ").replace("_", " "),
"description": meta["summary"],
"destination_path": (Path(sys.argv[0]).parent.parent.parent / "nodes" / folder / "json-schemas").absolute()
"destination_path": (
Path(sys.argv[0]).parent.parent.parent / "nodes" / folder / "json-schemas"
).absolute(),
}



if __name__ == "__main__":

import argparse

parser = argparse.ArgumentParser(description='JSON Schema generator for Haystack custom node packages')
parser.add_argument('-f','--folder-name', dest="folder", help='Name of the folder, i.e. hello-world-node', required=True)
parser.add_argument('-m','--module', dest="module_name", help='Name of the module, i.e. hello_world_node', required=True)
parser.add_argument('-v','--version', dest="version", help='Package version')
parser.add_argument('-t','--title', dest="title", help='Schema title, i.e. "My Haystack Hello World Node"')
parser.add_argument('-d','--description', dest="description", help='Schema description, i.e. "JSON schemas for Haystack nodes that can be used to greet the world."')
parser.add_argument('-o','--output-path', dest="destination_path", help='Path where to save the generated schemas (usually <your package>/json-schemas)')
parser = argparse.ArgumentParser(
description="JSON Schema generator for Haystack custom node packages"
)
parser.add_argument(
"-f",
"--folder-name",
dest="folder",
help="Name of the folder, i.e. hello-world-node",
required=True,
)
parser.add_argument(
"-m",
"--modules",
dest="module_names",
help="Name of the module, i.e. hello_world_node",
required=True,
)
params = vars(parser.parse_args())

package_data = get_package_data(folder=params["folder"])

update_json_schema(**package_data, module_name=params["module_name"])
update_json_schema(**package_data, module_names=params["module_names"].split(","))
150 changes: 72 additions & 78 deletions .github/workflows/test__speech2text.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,56 @@ on:

env:
PACKAGE: nodes/speech2text/
PYTEST_PARAMS: --maxfail=5 --durations=10
PYTEST_PARAMS: --maxfail=5 --durations=10 --suppress-no-test-exit-code

jobs:

yaml-schema:
runs-on: ubuntu-latest
steps:

- uses: actions/checkout@v2

- name: Setup Python
uses: ./.github/actions/python_cache/
with:
package: ${{ env.PACKAGE }}

- name: Install audio libraries
run: |
sudo apt-get update
sudo apt-get install libsndfile1 ffmpeg
- name: Install package
run: |
pip install ${{ env.PACKAGE }}[dev]
pip install git+https://github.com/openai/whisper.git
- name: Update pipeline YAML schemas
run: python .github/utils/generate_json_schema.py -f speech2text -m=speech2text.transcriber

- name: Check status
run: |
if [[ `git status --porcelain` ]]; then
git status
echo "##################################################################################################"
git diff
echo "##################################################################################################"
echo "# "
echo "# CHECK FAILED! The YAML schemas for this package were not updated."
echo "# "
echo "# Please generate the new schemas locally:"
echo "# "
echo "# python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text.transcriber"
echo "# "
echo "# Or see https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
echo "# "
echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack-extras/issues"
echo "# "
echo "##################################################################################################"
exit 1
fi
mypy:
runs-on: ubuntu-latest
steps:
Expand All @@ -27,19 +73,13 @@ jobs:
package: ${{ env.PACKAGE }}

- name: Install dependencies
run: pip install ${{ env.PACKAGE }}[dev]
run: |
pip install ${{ env.PACKAGE }}[dev]
pip install git+https://github.com/openai/whisper.git
- name: Mypy
# NOTE: https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-library-stubs-or-py-typed-marker
# FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
# We should see if there's a better way than --ignore-missing-imports
run: mypy ${{ env.PACKAGE }} --ignore-missing-imports

# - uses: act10ns/slack@v1
# with:
# status: ${{ job.status }}
# channel: '#haystack'
# if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
# Haystack does not provide type stubs, so --ignore-missing-imports is necessary
run: mypy --install-types --non-interactive ${{ env.PACKAGE }} --exclude=tests/ --exclude=json_schemas/ --ignore-missing-imports

pylint:
runs-on: ubuntu-latest
Expand All @@ -51,17 +91,16 @@ jobs:
with:
package: ${{ env.PACKAGE }}

- name: Install dependencies
run: |
pip install ${{ env.PACKAGE }}[dev]
pip install git+https://github.com/openai/whisper.git
- name: Pylint
run: |
cd ${{ env.PACKAGE }}
pylint -ry -j 0 speech2text/
# - uses: act10ns/slack@v1
# with:
# status: ${{ job.status }}
# channel: '#haystack'
# if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

unit-tests:
name: Unit / ${{ matrix.os }}
needs:
Expand All @@ -70,8 +109,9 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest] # windows-latest --> No audio deps available for Windows.
os: [ubuntu-latest] # windows-latest --> No audio deps available for Windows.
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v2

Expand All @@ -80,18 +120,19 @@ jobs:
with:
package: ${{ env.PACKAGE }}

- name: Install audio libraries
run: |
sudo apt-get update
sudo apt-get install libsndfile1 ffmpeg
- name: Install Haystack
run: pip install ${{ env.PACKAGE }}[dev]
run: |
pip install ${{ env.PACKAGE }}[dev]
pip install git+https://github.com/openai/whisper.git
- name: Run
run: pytest ${{ env.PYTEST_PARAMS }} -m "unit" ${{ env.PACKAGE }}

# - uses: act10ns/slack@v1
# with:
# status: ${{ job.status }}
# channel: '#haystack'
# if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

integration-tests:
name: Integration / ${{ matrix.os }}
needs:
Expand All @@ -100,7 +141,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest] # windows-latest --> No audio deps available for Windows.
os: [ubuntu-latest] # windows-latest --> No audio deps available for Windows.
runs-on: ${{ matrix.os }}

steps:
Expand All @@ -111,65 +152,18 @@ jobs:
with:
package: ${{ env.PACKAGE }}

- name: Install audio libraries
- name: Install Linux audio libraries
run: |
sudo apt-get update
sudo apt-get install libsndfile1 ffmpeg
- name: Install Haystack
run: pip install ${{ env.PACKAGE }}[dev]
run: |
pip install ${{ env.PACKAGE }}[dev]
pip install git+https://github.com/openai/whisper.git
- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "integration" ${{ env.PACKAGE }}
# - uses: act10ns/slack@v1
# with:
# status: ${{ job.status }}
# channel: '#haystack'
# if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'


yaml-schema:
runs-on: ubuntu-latest
steps:

- uses: actions/checkout@v2

- name: Setup Python
uses: ./.github/actions/python_cache/
with:
package: ${{ env.PACKAGE }}

- name: Install sndfile
run: sudo apt update && sudo apt-get install libsndfile1 ffmpeg

- name: Install package
run: pip install -U ${{ env.PACKAGE }}[dev]

- name: Update pipeline YAML schemas
run: python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text

- name: Check status
run: |
if [[ `git status --porcelain` ]]; then
git status
echo "##################################################################################################"
git diff
echo "##################################################################################################"
echo "# "
echo "# CHECK FAILED! The YAML schemas for this package were not updated."
echo "# "
echo "# Please generate the new schemas locally:"
echo "# "
echo "# python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text"
echo "# "
echo "# Or see https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
echo "# "
echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack-extras/issues"
echo "# "
echo "##################################################################################################"
exit 1
fi
Loading

0 comments on commit a9e9a46

Please sign in to comment.