[ChromeCart][Coupons] Support cart extraction for merchants (1)

Bug: 1246540
Change-Id: I571c07d1ef840672e8c80f1ed075fc902bc81773
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3141783
Reviewed-by: Wei-Yin Chen (陳威尹) <[email protected]>
Commit-Queue: Yue Zhang <[email protected]>
Cr-Commit-Position: refs/heads/main@{#919642}
diff --git a/chrome/renderer/resources/cart/cart-product-extraction.js b/chrome/renderer/resources/cart/cart-product-extraction.js
index 1cd8868..3f11963 100644
--- a/chrome/renderer/resources/cart/cart-product-extraction.js
+++ b/chrome/renderer/resources/cart/cart-product-extraction.js
@@ -17,12 +17,14 @@
 var priceCleanupRegex = new RegExp(
     '^((' + priceCleanupPrefix + ')\\s+)|' + priceCleanupPostfix + '$', 'i');
 var cartItemHTMLRegex = new RegExp(
-    '(cart|basket|bundle)[-_]?(item|product)', 'i');
-var cartItemTextContentRegex = new RegExp(
-    'remove|delete|save for later|move to (favo(u?)rite|list|wish( ?)list)s?',
-    'i');
-var moveToCartRegex = new RegExp('move to (cart|bag)', 'i');
-var addToCartRegex = new RegExp('add to cart', 'i');
+    '(cart|basket|bundle)[-_]?((\\w+)[-_])?(item|product)', 'i');
+var cartItemTextRegex = new RegExp(
+    '(remove|delete|save for later|move to (favo(u?)rite|list|wish( ?)list)s?)'+
+    '|(qty)', 'i');
+var moveToCartTextRegex = new RegExp('move to (cart|bag)', 'i');
+var addToCartTextRegex = new RegExp('add to cart', 'i');
+var cartPriceTextRegex = new RegExp('estimated (sales )?tax', 'i');
+var minicartHTMLRegex = new RegExp('mini-cart-product', 'i');
 var productIdHTMLRegex = new RegExp('<a href="#modal-(\\w+)', 'i');
 var productIdURLRegex = new RegExp(
     '((\\w+)-\\d+-medium)|(images.cymax.com/Images/\\d+/(\\w+)-)', 'i');
@@ -491,7 +493,7 @@
   }
   // Generic heuristic to search for price elements.
   let captured_prices = [];
-  for (const price of item.querySelectorAll('span, b, p, div, h3')) {
+  for (const price of item.querySelectorAll('span, b, p, div, h3, td, li')) {
     const candidate = price.innerText.trim();
     if (!candidate.match(priceRegexFull))
       continue;
@@ -545,7 +547,8 @@
 
 function extractProductId(url, imageUrl, item) {
   const hostname = window.location.hostname;
-  if (idExtractionMap === undefined) {
+  if (typeof idExtractionMap === 'undefined' ||
+      idExtractionMap === undefined) {
     return null;
   }
   const source_map = {"product_url": url,
@@ -631,24 +634,28 @@
   return false;
 }
 
-function matchNonCartPattern(item, pattern) {
-  if (item.parentElement) {
-    // Walmart has 'move to cart' outside of the div.cart-item.
-    if (item.parentElement.textContent.toLowerCase().match(pattern))
-      return true;
-  }
-  return item.textContent.toLowerCase().match(pattern);
+function matchPattern(item, pattern, matchText) {
+  if (item === null) return false;
+  const textToMatch = matchText ? item.textContent : item.outerHTML;
+  return textToMatch.toLowerCase().match(pattern);
 }
 
 function isCartItem(item) {
   // TODO: Improve the heuristic here to accommodate more formats of cart item.
-  if (matchNonCartPattern(item, moveToCartRegex)) return false;
+  if (matchPattern(item, moveToCartTextRegex, true)) return false;
+  // Walmart has 'move to cart' outside of the div.cart-item.
+  if (matchPattern(item.parentElement, moveToCartTextRegex, true)) return false;
+  if (matchPattern(item, cartPriceTextRegex, true)) return false;
   // Item element in bestbuy.com contains "add to cart" for things
   // like protection plans.
   if (!document.URL.includes("bestbuy.com")
-    && matchNonCartPattern(item, addToCartRegex)) return false;
-  return item.textContent.toLowerCase().match(cartItemTextContentRegex) ||
-      item.outerHTML.toLowerCase().match(cartItemHTMLRegex);
+      && !document.URL.includes("orientaltrading.com")
+      && matchPattern(item, addToCartTextRegex, true)) return false;
+  if ((document.URL.includes("ashleyfurniture.com")
+      || document.URL.includes("gnc.com"))
+      && matchPattern(item, minicartHTMLRegex, false)) return false;
+  return matchPattern(item, cartItemTextRegex, true) ||
+    matchPattern(item, cartItemHTMLRegex, false);
 }
 
 function extractOneItem(item, extracted_items, processed, output,