Merge pull request deepset-ai#3 from deepset-ai/dep-groups

speech2text: reduce to Whisper
jdixosnd · Mar 2, 2023 · a9e9a46 · a9e9a46
2 parents 78b70e3 + 2f2b4b1
commit a9e9a46
Show file tree

Hide file tree

Showing 25 changed files with 457 additions and 474 deletions.
diff --git a/.github/actions/python_cache/action.yml b/.github/actions/python_cache/action.yml
@@ -15,7 +15,7 @@ inputs:
  pythonVersion:
  description: 'Python version to use'
  required: true
- default: "3.7"
+ default: "3.8"
  package:
  description: 'Package to install'
  required: true

diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py
@@ -15,10 +15,13 @@
 
 try:
  from importlib import metadata
-except ImportError: # for Python<3.8
+except ImportError:  # for Python<3.8
  import importlib_metadata as metadata
 
-from haystack.nodes._json_schema import find_subclasses_in_modules, create_schema_for_node_class
+from haystack.nodes._json_schema import (
+ find_subclasses_in_modules,
+ create_schema_for_node_class,
+)
 
 
 logging.basicConfig(level=logging.INFO)
@@ -27,13 +30,17 @@
 BRANCH_NAME = "text2speech" # FIXME should be main after merge
 
 
-def get_package_json_schema(title: str, description: str, module_name: str, schema_ref: str):
+def get_package_json_schema(
+ title: str, description: str, module_names: List[str], schema_ref: str
+):
  """
  Generate JSON schema for the custom node(s).
  """
  # List all known nodes in the given modules
- importlib.import_module(module_name)
- possible_node_classes = find_subclasses_in_modules(importable_modules=[module_name])
+ possible_node_classes = []
+ for module_name in module_names:
+ importlib.import_module(module_name)
+ possible_node_classes += find_subclasses_in_modules(importable_modules=[module_name])
 
  # Build the definitions and refs for the nodes
  schema_definitions = []
@@ -58,7 +65,7 @@ def update_json_schema(
  title: str,
  description: str,
  package_name: str,
- module_name: str,
+ module_names: str,
 ):
  """
  If the version contains "rc", only update main's schema.
@@ -71,8 +78,8 @@ def update_json_schema(
  package_schema = get_package_json_schema(
  title=title,
  description=description,
- module_name=module_name,
- schema_ref=base_schema_ref + main_filename
+ module_names=module_names,
+ schema_ref=base_schema_ref + main_filename,
  )
 
  # Update mains's schema
@@ -127,39 +134,47 @@ def generate_schema_index(
  "title": title,
  "description": description,
  "type": "object",
- "oneOf": []
+ "oneOf": [],
  }
  with open(destination_path / index_name, "w") as json_file:
  json.dump(index, json_file, indent=2)
 
 
-
 def get_package_data(folder: str):
- package_name = "haystack-"+folder
+ package_name = "haystack-" + folder
  meta = metadata.metadata(package_name)
  return {
  "package_name": package_name,
- "version": metadata.version("haystack-"+folder),
+ "version": metadata.version("haystack-" + folder),
  "title": str(meta["name"]).replace("-", " ").replace("_", " "),
  "description": meta["summary"],
- "destination_path": (Path(sys.argv[0]).parent.parent.parent / "nodes" / folder / "json-schemas").absolute()
+ "destination_path": (
+ Path(sys.argv[0]).parent.parent.parent / "nodes" / folder / "json-schemas"
+ ).absolute(),
  }
 
 
-
 if __name__ == "__main__":
 
  import argparse
 
- parser = argparse.ArgumentParser(description='JSON Schema generator for Haystack custom node packages')
- parser.add_argument('-f','--folder-name', dest="folder", help='Name of the folder, i.e. hello-world-node', required=True)
- parser.add_argument('-m','--module', dest="module_name", help='Name of the module, i.e. hello_world_node', required=True)
- parser.add_argument('-v','--version', dest="version", help='Package version')
- parser.add_argument('-t','--title', dest="title", help='Schema title, i.e. "My Haystack Hello World Node"')
- parser.add_argument('-d','--description', dest="description", help='Schema description, i.e. "JSON schemas for Haystack nodes that can be used to greet the world."')
- parser.add_argument('-o','--output-path', dest="destination_path", help='Path where to save the generated schemas (usually <your package>/json-schemas)')
+ parser = argparse.ArgumentParser(
+ description="JSON Schema generator for Haystack custom node packages"
+ )
+ parser.add_argument(
+ "-f",
+ "--folder-name",
+ dest="folder",
+ help="Name of the folder, i.e. hello-world-node",
+ required=True,
+ )
+ parser.add_argument(
+ "-m",
+ "--modules",
+ dest="module_names",
+ help="Name of the module, i.e. hello_world_node",
+ required=True,
+ )
  params = vars(parser.parse_args())
-
  package_data = get_package_data(folder=params["folder"])
-
- update_json_schema(**package_data, module_name=params["module_name"])
+ update_json_schema(**package_data, module_names=params["module_names"].split(","))
diff --git a/.github/workflows/test__speech2text.yml b/.github/workflows/test__speech2text.yml
@@ -12,10 +12,56 @@ on:
 
 env:
  PACKAGE: nodes/speech2text/
- PYTEST_PARAMS: --maxfail=5 --durations=10
+ PYTEST_PARAMS: --maxfail=5 --durations=10 --suppress-no-test-exit-code
 
 jobs:
 
+ yaml-schema:
+ runs-on: ubuntu-latest
+ steps:
+
+ - uses: actions/checkout@v2
+
+ - name: Setup Python
+ uses: ./.github/actions/python_cache/
+ with:
+ package: ${{ env.PACKAGE }}
+
+ - name: Install audio libraries
+ run: |
+ sudo apt-get update
+ sudo apt-get install libsndfile1 ffmpeg
+
+ - name: Install package
+ run: |
+ pip install ${{ env.PACKAGE }}[dev]
+ pip install git+https://github.com/openai/whisper.git
+
+ - name: Update pipeline YAML schemas
+ run: python .github/utils/generate_json_schema.py -f speech2text -m=speech2text.transcriber
+
+ - name: Check status
+ run: |
+ if [[ `git status --porcelain` ]]; then
+ git status
+ echo "##################################################################################################"
+ git diff
+ echo "##################################################################################################"
+ echo "# "
+ echo "# CHECK FAILED! The YAML schemas for this package were not updated."
+ echo "# "
+ echo "# Please generate the new schemas locally:"
+ echo "# "
+ echo "# python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text.transcriber"
+ echo "# "
+ echo "# Or see https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
+ echo "# "
+ echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack-extras/issues"
+ echo "# "
+ echo "##################################################################################################"
+ exit 1
+ fi
+
  mypy:
  runs-on: ubuntu-latest
  steps:
@@ -27,19 +73,13 @@ jobs:
  package: ${{ env.PACKAGE }}
 
  - name: Install dependencies
- run: pip install ${{ env.PACKAGE }}[dev]
+ run: |
+ pip install ${{ env.PACKAGE }}[dev]
+ pip install git+https://github.com/openai/whisper.git
 
  - name: Mypy
- # NOTE: https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-library-stubs-or-py-typed-marker
- # FIXME --install-types does not work properly yet, see https://github.com/python/mypy/issues/10600
- # We should see if there's a better way than --ignore-missing-imports
- run: mypy ${{ env.PACKAGE }} --ignore-missing-imports
-
- # - uses: act10ns/slack@v1
- # with:
- # status: ${{ job.status }}
- # channel: '#haystack'
- # if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+ # Haystack does not provide type stubs, so --ignore-missing-imports is necessary
+ run: mypy --install-types --non-interactive ${{ env.PACKAGE }} --exclude=tests/ --exclude=json_schemas/ --ignore-missing-imports
 
  pylint:
  runs-on: ubuntu-latest
@@ -51,17 +91,16 @@ jobs:
  with:
  package: ${{ env.PACKAGE }}
 
+ - name: Install dependencies
+ run: |
+ pip install ${{ env.PACKAGE }}[dev]
+ pip install git+https://github.com/openai/whisper.git
+
  - name: Pylint
  run: |
  cd ${{ env.PACKAGE }}
  pylint -ry -j 0 speech2text/
 
- # - uses: act10ns/slack@v1
- # with:
- # status: ${{ job.status }}
- # channel: '#haystack'
- # if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
  unit-tests:
  name: Unit / ${{ matrix.os }}
  needs:
@@ -70,8 +109,9 @@ jobs:
  strategy:
  fail-fast: false
  matrix:
- os: [ubuntu-latest, macos-latest] # windows-latest --> No audio deps available for Windows.
+ os: [ubuntu-latest] # windows-latest --> No audio deps available for Windows.
  runs-on: ${{ matrix.os }}
+
  steps:
  - uses: actions/checkout@v2
 
@@ -80,18 +120,19 @@ jobs:
  with:
  package: ${{ env.PACKAGE }}
 
+ - name: Install audio libraries
+ run: |
+ sudo apt-get update
+ sudo apt-get install libsndfile1 ffmpeg
+
  - name: Install Haystack
- run: pip install ${{ env.PACKAGE }}[dev]
+ run: |
+ pip install ${{ env.PACKAGE }}[dev]
+ pip install git+https://github.com/openai/whisper.git
 
  - name: Run
  run: pytest ${{ env.PYTEST_PARAMS }} -m "unit" ${{ env.PACKAGE }}
 
- # - uses: act10ns/slack@v1
- # with:
- # status: ${{ job.status }}
- # channel: '#haystack'
- # if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
  integration-tests:
  name: Integration / ${{ matrix.os }}
  needs:
@@ -100,7 +141,7 @@ jobs:
  strategy:
  fail-fast: false
  matrix:
- os: [ubuntu-latest, macos-latest] # windows-latest --> No audio deps available for Windows.
+ os: [ubuntu-latest] # windows-latest --> No audio deps available for Windows.
  runs-on: ${{ matrix.os }}
 
  steps:
@@ -111,65 +152,18 @@ jobs:
  with:
  package: ${{ env.PACKAGE }}
 
- - name: Install audio libraries
+ - name: Install Linux audio libraries
  run: |
  sudo apt-get update
  sudo apt-get install libsndfile1 ffmpeg
 
  - name: Install Haystack
- run: pip install ${{ env.PACKAGE }}[dev]
+ run: |
+ pip install ${{ env.PACKAGE }}[dev]
+ pip install git+https://github.com/openai/whisper.git
 
  - name: Run tests
  env:
  TOKENIZERS_PARALLELISM: 'false' # Avoid logspam by tokenizers
  run: |
  pytest ${{ env.PYTEST_PARAMS }} -m "integration" ${{ env.PACKAGE }}
-
- # - uses: act10ns/slack@v1
- # with:
- # status: ${{ job.status }}
- # channel: '#haystack'
- # if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
-
- yaml-schema:
- runs-on: ubuntu-latest
- steps:
-
- - uses: actions/checkout@v2
-
- - name: Setup Python
- uses: ./.github/actions/python_cache/
- with:
- package: ${{ env.PACKAGE }}
-
- - name: Install sndfile
- run: sudo apt update && sudo apt-get install libsndfile1 ffmpeg
-
- - name: Install package
- run: pip install -U ${{ env.PACKAGE }}[dev]
-
- - name: Update pipeline YAML schemas
- run: python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text
-
- - name: Check status
- run: |
- if [[ `git status --porcelain` ]]; then
- git status
- echo "##################################################################################################"
- git diff
- echo "##################################################################################################"
- echo "# "
- echo "# CHECK FAILED! The YAML schemas for this package were not updated."
- echo "# "
- echo "# Please generate the new schemas locally:"
- echo "# "
- echo "# python .github/utils/generate_json_schema.py -o ${{ env.PACKAGE }}/json-schemas -v 0.0.1 -t "speech2text nodes schema" -d "schema for speech2text nodes" -p speech2text -m speech2text"
- echo "# "
- echo "# Or see https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
- echo "# "
- echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack-extras/issues"
- echo "# "
- echo "##################################################################################################"
- exit 1
- fi