Skip to content

Commit ed70a09

Browse files
Update cert-help-extraction.py to support CERT-C optional (recommendation) rules
1 parent 35cfb0f commit ed70a09

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

Diff for: scripts/help/cert-help-extraction.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22
from argparse import ArgumentParser
3+
from typing import Generator
34
import tempfile
45
import re
56
import urllib.request
@@ -23,6 +24,7 @@
2324

2425
CERT_WIKI = "https://wiki.sei.cmu.edu"
2526
RULES_LIST_C = "/confluence/display/c/2+Rules"
27+
RECOMMENDED_LIST_C = "/confluence/display/c/3+Recommendations"
2628
RULES_LIST_CPP = "/confluence/display/cplusplus/2+Rules"
2729

2830
cache_path = script_path.parent / '.cache'
@@ -47,16 +49,22 @@ def soupify(url: str) -> BeautifulSoup:
4749

4850
return BeautifulSoup(content, 'html.parser')
4951

50-
51-
def get_rules():
52-
rules = []
53-
for soup in [soupify(f"{CERT_WIKI}{RULES_LIST_C}"), soupify(f"{CERT_WIKI}{RULES_LIST_CPP}")]:
52+
def get_rule_listings() -> Generator[Tag, None, None]:
53+
for rule_list_id in [RULES_LIST_C, RULES_LIST_CPP]:
54+
soup = soupify(f"{CERT_WIKI}{rule_list_id}")
5455
if soup == None:
55-
return None
56-
57-
rule_listing_start = soup.find(
56+
continue
57+
58+
yield soup.find(
5859
"h1", string="Rule Listing")
5960

61+
soup = soupify(f"{CERT_WIKI}{RECOMMENDED_LIST_C}")
62+
if soup != None:
63+
yield soup.find("h1", string="Recommendation Listing")
64+
65+
def get_rules():
66+
rules = []
67+
for rule_listing_start in get_rule_listings():
6068
for link in rule_listing_start.next_element.next_element.find_all('a'):
6169
if '-C' in link.string:
6270
rule, title = map(str.strip, link.string.split('.', 1))
@@ -214,6 +222,8 @@ def helper(node):
214222
# Fix a broken url present in many CERT-C pages
215223
if node.name == 'a' and 'href' in node.attrs and node['href'] == "http://BB. Definitions#vulnerability":
216224
node['href'] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-vulnerability"
225+
elif node.name == 'a' and 'href' in node.attrs and node['href'] == "http://BB. Definitions#unexpected behavior":
226+
node['href'] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-unexpectedbehavior"
217227
# Turn relative URLs into absolute URLS
218228
elif node.name == 'a' and 'href' in node.attrs and node['href'].startswith("/confluence"):
219229
node['href'] = f"{CERT_WIKI}{node['href']}"

0 commit comments

Comments
 (0)