There was an interesting idea posed in the forum to build a tool to check the precision of language used in requirements specs. Different levels of “loose” wording were flagged using if/then rules. I tried to do something similar taking an NLP-based approach, as it would be easier to extend into more advanced realms, if needed, instead of maintaining procedural rules. Here’s my solution:
# My solution to this: https://discuss.streamlit.io/t/the-if-else-appear-anyway/24713/7
import streamlit as st
import spacy
from spacy import displacy
from spacy.matcher import PhraseMatcher
nlp = spacy.load('en_core_web_sm') #some alternatives en_core_web_md, en_core_web_lg
nlp.disable_pipes('ner')
weakWords=['fast','quickly', 'easy', 'timely', 'before', 'after', 'user-friendly', 'effective', 'multiple', 'as possible', 'appropriate', 'normal', 'capability', 'reliable', 'state-of-the-art', 'effortless', 'multi']
unboundedList=['at least','more than','less than','not less than','no less than','at the minimum','always']
ambiguity=['should','may','if possible','when','when appropriate','detail','details','analyse','respond','verified']
ambiguityWeakWords=['support','relevant information','needed information']
ENTS = ['WEAK', 'UNBOUNDED', 'AMBIGUOUS', 'AMBIGUOUS_WEAK']
ENT_COLORS = {'WEAK': 'pink', 'UNBOUNDED': 'lightgrey', 'AMBIGUOUS': 'lime', 'AMBIGUOUS_WEAK': 'skyblue'}
matcher = PhraseMatcher(nlp.vocab)
patterns = [nlp.make_doc(text) for text in weakWords]
matcher.add('WEAK', patterns)
patterns = [nlp.make_doc(text) for text in unboundedList]
matcher.add('UNBOUNDED', patterns)
patterns = [nlp.make_doc(text) for text in ambiguity]
matcher.add('AMBIGUOUS', patterns)
patterns = [nlp.make_doc(text) for text in ambiguityWeakWords]
matcher.add('AMBIGUOUS_WEAK', patterns)
st.header('Requirements check')
st.write('Simple app using **spaCy** rule-based matcher. See [spaCY docs](https://spacy.io/usage/rule-based-matching).')
st.write('---')
text = st.text_area('🔤 Enter your requirement statement here', placeholder='at a minimum the software must load quickly using state-of-the-art coding practices and if possible present relevant information always')
if text != '':
doc = nlp(text)
# Remove ner component from pipeline
matches = matcher(doc)
matched_ents = [] # Collect ents matched in sentence
for match_id, start, end in matches:
# Get the matched span by slicing the Doc
matched_span = doc[start:end]
print('matched text:',matched_span.text)
print('matched label:',nlp.vocab.strings[match_id])
matched_ent = {
'start': matched_span.start_char - matched_span.sent.start_char,
'end': matched_span.end_char - matched_span.sent.start_char,
'label': nlp.vocab.strings[match_id]
}
matched_ents.append(matched_ent)
if matched_ents:
html = displacy.render(
{'text': text, 'ents': matched_ents},
style='ent',
options={'ents': ENTS, 'colors': ENT_COLORS},
manual=True
)
# Newlines seem to mess with the rendering
html = html.replace('\n', ' ')
html_wrapper = f"""<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{html}</div>"""
st.warning('🔴 Issue found in requirement')
st.write(html_wrapper, unsafe_allow_html=True)
else:
st.info('🟢 No issues in requirement')
You’ll need to install spacy and at least the small language model. This is very well documented on the spaCy.io site.
pip install -U spacy
python -m spacy download en_core_web_sm
Let me know what you think?
Arvindra