1
1
#!/usr/bin/env python3
2
2
from argparse import ArgumentParser
3
+ from typing import Generator
3
4
import tempfile
4
5
import re
5
6
import urllib .request
23
24
24
25
CERT_WIKI = "https://wiki.sei.cmu.edu"
25
26
RULES_LIST_C = "/confluence/display/c/2+Rules"
27
+ RECOMMENDED_LIST_C = "/confluence/display/c/3+Recommendations"
26
28
RULES_LIST_CPP = "/confluence/display/cplusplus/2+Rules"
27
29
28
30
cache_path = script_path .parent / '.cache'
@@ -47,16 +49,22 @@ def soupify(url: str) -> BeautifulSoup:
47
49
48
50
return BeautifulSoup (content , 'html.parser' )
49
51
50
-
51
- def get_rules ():
52
- rules = []
53
- for soup in [soupify (f"{ CERT_WIKI } { RULES_LIST_C } " ), soupify (f"{ CERT_WIKI } { RULES_LIST_CPP } " )]:
52
+ def get_rule_listings () -> Generator [Tag , None , None ]:
53
+ for rule_list_id in [RULES_LIST_C , RULES_LIST_CPP ]:
54
+ soup = soupify (f"{ CERT_WIKI } { rule_list_id } " )
54
55
if soup == None :
55
- return None
56
-
57
- rule_listing_start = soup .find (
56
+ continue
57
+
58
+ yield soup .find (
58
59
"h1" , string = "Rule Listing" )
59
60
61
+ soup = soupify (f"{ CERT_WIKI } { RECOMMENDED_LIST_C } " )
62
+ if soup != None :
63
+ yield soup .find ("h1" , string = "Recommendation Listing" )
64
+
65
+ def get_rules ():
66
+ rules = []
67
+ for rule_listing_start in get_rule_listings ():
60
68
for link in rule_listing_start .next_element .next_element .find_all ('a' ):
61
69
if '-C' in link .string :
62
70
rule , title = map (str .strip , link .string .split ('.' , 1 ))
@@ -214,6 +222,8 @@ def helper(node):
214
222
# Fix a broken url present in many CERT-C pages
215
223
if node .name == 'a' and 'href' in node .attrs and node ['href' ] == "http://BB. Definitions#vulnerability" :
216
224
node ['href' ] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-vulnerability"
225
+ elif node .name == 'a' and 'href' in node .attrs and node ['href' ] == "http://BB. Definitions#unexpected behavior" :
226
+ node ['href' ] = "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-unexpectedbehavior"
217
227
# Turn relative URLs into absolute URLS
218
228
elif node .name == 'a' and 'href' in node .attrs and node ['href' ].startswith ("/confluence" ):
219
229
node ['href' ] = f"{ CERT_WIKI } { node ['href' ]} "
0 commit comments