refactor(ai): improve bouquet name balancing and normalization

- Filter names by word count (2, 3, 4 words) for balanced distribution - Remove duplicates per word count category - Merge names in 2:3:4 proportion to ensure equal representation - Update normalization to lowercase all words except first letter of first word - Replace simple deduplication with structured balancing logic
2026-01-23 17:44:02 +03:00
parent 4a624d5fef
commit 3aac83474b
1 changed files with 33 additions and 13 deletions
--- a/myproject/products/services/ai/bouquet_names.py
+++ b/myproject/products/services/ai/bouquet_names.py
@@ -133,21 +133,42 @@ class BouquetNameGenerator(BaseAIProductService):
                normalized_line = self._normalize_case(line)
                names.append(normalized_line)

-        # Удаляем дубликаты
-        unique_names = []
-        seen = set()
+        # Фильтруем и сортируем названия по длине для равномерного распределения
+        names_by_length = {2: [], 3: [], 4: []}
+
        for name in names:
+            word_count = len(name.split())
+            if word_count in names_by_length:
+                names_by_length[word_count].append(name)
+
+        # Удаляем дубликаты в каждой группе
+        for length in names_by_length:
+            unique_list = []
+            seen = set()
+            for name in names_by_length[length]:
                if name not in seen:
                    seen.add(name)
-                unique_names.append(name)
+                    unique_list.append(name)
+            names_by_length[length] = unique_list

-        return unique_names
+        # Объединяем названия в один список в пропорциях 2:3:4
+        balanced_names = []
+
+        # Определяем максимальное количество названий одного типа
+        max_per_length = max(len(names_list) for names_list in names_by_length.values()) if any(names_by_length.values()) else 0
+
+        # Добавляем названия по одному из каждой категории по очереди
+        for i in range(max_per_length):
+            for length in [2, 3, 4]:  # Проходим по длине 2, 3, 4
+                if i < len(names_by_length[length]):
+                    balanced_names.append(names_by_length[length][i])
+
+        return balanced_names

    def _normalize_case(self, text: str) -> str:
        """
        Приводит текст к формату: первое слово с заглавной буквы, остальные строчные
        Например: "романтический БУКЕТ роз" -> "Романтический букет роз"
-        Но сохраняет имена собственные: "Букет Van Gogh" -> "Букет Van Gogh"
        """
        if not text:
            return text
@@ -158,12 +179,11 @@ class BouquetNameGenerator(BaseAIProductService):
        if not words:
            return text

-        # Первое слово с заглавной буквы, остальные как есть (сохраняем имена собственные)
-        first_word = words[0].capitalize()
-        remaining_words = words[1:]
+        # Первое слово с заглавной буквы, остальные строчные
+        normalized_words = [words[0].capitalize()] + [word.lower() for word in words[1:]]

        # Собираем обратно в строку
-        return ' '.join([first_word] + remaining_words)
+        return ' '.join(normalized_words)

    def generate_and_store(
        self,