Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Legifrance (2020) layout #11

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
law_proposals et law_projects au format Legifrance 2020
  • Loading branch information
Cimbali committed Apr 28, 2021
commit 053f6b7cb23d68ac64fae23d2bc7d0cfb7967d78
25 changes: 16 additions & 9 deletions legipy/parsers/pending_law_list_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,42 @@

from legipy.common import cleanup_url
from legipy.common import merge_spaces
from legipy.common import LAW_KINDS
from legipy.models.law import Law


def parse_pending_law_list(url, html):
def parse_pending_law_list(url, html, **law_kwargs):
soup = BeautifulSoup(html, 'html5lib', from_encoding='utf-8')
results = []

for year_header in soup.find_all('h3'):
year = int(year_header.get_text())
ul = year_header.find_next_sibling('ul')
for year_header in soup.find_all('h2'):
year = int(year_header.get_text().strip())
ul = year_header.find_next('ul')

if not ul:
continue

for law_entry in ul.select('li a'):
link_text = law_entry.get_text()
link_text = law_entry.get_text().strip()
nor_num = re.search(r'\(([A-Z0-9]+)\)$', link_text)

type_loi = re.match(r'(Projet|Proposition)\s+de\s+loi\s+({})?'\
.format('|'.join(LAW_KINDS)), link_text)
if type_loi:
print(type_loi.groups())

url_legi = cleanup_url(urljoin(url, law_entry['href']))
qs_legi = parse_qs(urlparse(url_legi).query)
id_legi = urlparse(url_legi).path.strip('/').split('/')[-1]

results.append(Law(
year=year,
legislature=int(qs_legi['legislature'][0]),
type=qs_legi['typeLoi'][0],
id_legi=id_legi,
type=type_loi.group(0).lower()[:4],
kind=type_loi.group(1),
title=merge_spaces(link_text),
nor=nor_num.group(1) if nor_num else None,
url_legi=url_legi,
id_legi=qs_legi['idDocument'][0]
**law_kwargs
))

return results
12 changes: 5 additions & 7 deletions legipy/services/law_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,15 @@
class LawService(object):
pub_url = new_page_url('liste/dossierslegislatifs/{legislature}/')
law_url = new_page_url('dossierlegislatif/{id_legi}/')
pend_url = servlet_url('affichLoiPreparation')
comm_url = servlet_url('affichSarde')

def pending_laws(self, legislature, government=True):
response = requests.get(
self.pend_url,
params={
'legislature': legislature,
'typeLoi': 'proj' if government else 'prop'
}
self.pub_url.format(legislature=legislature),
params={'type': 'PROJET_LOI' if government else 'PROPOSITION_LOI'}
)
return parse_pending_law_list(response.url, response.content)
return parse_pending_law_list(response.url, response.content,
legislature=legislature)

def published_laws(self, legislature):
response = requests.get(
Expand All @@ -49,6 +46,7 @@ def common_laws(self):
'page': page
}
)
print(response.url)
laws = parse_common_law_list(response.url, response.content)
common_laws += laws
page += 1
Expand Down