python regular expression to clean the RMP data.
import re
strtest = """ 3602433631519" /> </td>
<td> 7 </td>
<td>< = "> </a></td>
<td>HRB_HighClaim_Sideline</td>
<!-- Align Rule condition to variable expression -->
<td></td>
<td>MVEL</td>
<td>
<pre class="code">get("$BFS.hrb_claims_by_customer_us.n_claim_count")!\
=empty &&
get("$var_001")!=empty &&
get("$var_002")!=empty &&
$var_001>0 &&
$var_002< 3650</pre>
</td>
<td>
<td>
<table>
</tr>
<tr>
<td> <a href=
</tr>
</table>
</td>tail">
<td>
</td>
<td> 8 </td>
<td><a href=" 05</a></td>
<td>fortress_test_bf_continue_mo</td>
<td>
<table>
<tr>
<td> <a href=" " ><i class="fa fa-bell"></i></a> </td>
</tr>
<tr>
<td> <a href="#" </td>
</tr>
</table>
</td>
<td>
<a class="btn btn-link" href="/ru
</div>
</td>
</tr>
<td> 999 </td>
<td><a href="/rule/show</td>
<td>fraudAmtRatioInHL30ForASIN</td>
"""
# 1. find pattern like <td> 7 </td>, <td> 8 </td>, <td> 999 </td>
pattern = re.compile(r"<td> [0-9]+ </td>")
re.findall(pattern, strtest)
str_split = re.split(pattern, strtest)
# 2. find pattern like <pre class=\"code\"> ----------any text inside this-------- </pre>
pattern2 = re.compile(r'<pre class=\"code\">(.*?)</pre>')
res = []
for x in str_split:
res += (re.findall(pattern2, x.replace('\n', '').replace('\t', '')))
# 3. find pattern to remove the check the value is empty or not. if note remove, the count will be doubled
pattern3 = re.compile(r'get(.*?)!=empty')
re.sub(pattern3, '', res[0])
re.sub(pattern3, '', res[0]).replace('&', '&')