%\documentstyle{article}
\documentclass{article}
\usepackage{cmap}
\unitlength = 1mm
\textwidth=17.5cm
\textheight=26.5cm
\voffset=-3truecm
\oddsidemargin=-9.6mm
\usepackage{latexsym,amsxtra,amscd,ifthen}
\usepackage{amsmath,amsfonts,amssymb,amsthm}
\usepackage{amsfonts}
\usepackage{verbatim}
%\renewcommand{\refname}{Список литературы}
%\renewcommand{\contentsname}{Содержание}
\theoremstyle{plain}
\newtheorem{problem}{Problem}
\theoremstyle{definition}
\newtheorem{definition}{Definition}
%\newtheorem{theorema}{Теорема}
%\def\proof{{\bf Доказательство.}}
\def\d{\displaystyle}
\def\o{\overline}
\author{Dmitri Piontkovski, Maxim Prasolov, Grigory Rybnikov}
\title{How to count words?}
\date{}
\begin{document}
\maketitle
\section {Main problem}
\begin{problem}
\label{1}
The language of Winnie-Pooh tribe has 100 words. All possible combinations of
these words, in any order, are used as sentences of the language. The are two
magic spells, ``Earth stands on Great Crocodile'' and ``Every evening Crocodile
swallows Sun'', that cause tornado. That is why it is not allowed to pronounce
sentences that contain the above sequences of words\footnote{Even if the words
are in other forms}. How many sentences of 20 words in this language are
allowed?
\end{problem}
\begin{problem}
\label{2} A computer uses $256$ commands. There is a sequence of
four commands that breaks the computer. The programmers made all possible
programs of\/ $7$ commands. Find the percentage of the
programs that do not break the computer.%
\footnote{Similar story happened in 1990s with the first version
of
\emph{Pentium} microprocessor.% (``Pentium F00F bug''.)
}
\end{problem}
\begin{problem}[Main Problem]
\label{0} \label{main_p}
The alphabet of a language $L$ consists of $N$ letters. Several words $v_1,
\dots, v_k$ are called\/ \emph{forbidden} and are not used in the language. A
word (that is, a finite sequence of letters) is called\/ \emph{admissible} if
no part of it is a forbidden word. Find the number of admissible words of $n$
letters in $L$.
\end{problem}
\begin{problem}
Show that the Problems~\ref{1} and~\ref{2} are special cases of
Problem~\ref{0}.
\end{problem}
%Все остальные задачи этой темы тоже будут сводиться к Главной задаче.
\section{How to write down the answer?}
Choose an alphabet $A$ of $N$ letters (for example, if $A = (a,b,c,\dots,z)$,
then $N=26$). By a \emph{word} we will mean an arbitrary finite sequence of
letters of the alphabet $A$. A part of a word is called its \emph{subword}.
We assume that every language $L$ has exactly one word of zero
length, that is, an \emph{empty} word. %Traditionally, it is
%denoted by $\Lambda$.
%The empty word is always admissible. That
%is, we always assume that the admissible words are not empty.
We assume that distinct forbidden words are not subwords of each other. We also
assume that each forbidden word has at least two letters, that is, the empty
word and one-letter words are admissible. Recall that the set of forbidden
words is finite.
\begin{problem}
\label{free} The\/ \emph{free language} $F_A$ over the alphabet $A$ is the
language with no forbidden words. Prove that the number of the words of $n$
letters in this language is equal to $N^n$.
\end{problem}
\begin{problem}
\label{b}
Let $B$ be the language whose forbidden words are all two-letter words with
different letters. Prove that the number of admissible words of $n$ letters in
the language $B$ is equal to $N$ for any positive integer $n$.
\end{problem}
Let $M$ be an arbitrary set of words. Let us denote by $m_n$ the number of
$n$-letter words in this set. The infinite sum
$$
M (x) = m_0 + m_1 x + m_2 x^2 + m_3 x^3 + \dots
$$
is called the\/ \emph{dimension series} of the set $M$. The infinite sums of
such type (with arbitrary numbers as coefficients $m_n$) will be briefly
referred to as\/ \emph{series} (their complete name, which will\/ \emph{not} be
used here, is\/ \emph{formal power series}).
For any language $L$, by its dimension series $L(x)$ we will mean the dimension
series of the set of admissible words. For example, for the free language $F_A$
its dimension series is the geometric series $F_A(x) = 1 + N x + N^2 x^2 + N^3
x^3 + \dots $, and for the language $B$ above we have $B (x) = 1 + Nx + Nx^2 +
Nx^3 + \dots$
%Хотя в ряде размеров стоит знак "$+$",
%складывать его члены нельзя: это просто
%способ записи ответа к Главной задаче сразу для всех возможных $n$.
\begin{problem}
\label{3} Write down the dimension series for the language over the
alphabet $\{a,b\}$ with forbidden words $aa$ and~$bb$.
\end{problem}
\section{The arithmetics of languages}
If a set $M$ contains finitely many words, then its dimension series is a
polynomial in the variable $x$. For infinite sets, their dimension series are
infinite as well, but they allow various arithmetic operations similar to the
operations over the polynomials, that is, addition, subtraction, multiplication
by each other and by numbers, and even sometimes division.
In the definitions and problems of this section, $S = s_0 + s_1 x + s_2 x^2 +
\dots$ and $R = r_0 + r_1 x + r_2 x^2 + \dots$ are two series, and $L_1$ and
$L_2$ are two languages over alphabets $A_1$ and $A_2$ without common letters.
We will assume that the alphabet $A_1$ consists of upper-case letters while the
alphabet $A_2$ consists of lower-case ones. Let the alphabet $A$ be the union
of the alphabets $A_1$ and $A_2$, that is, $A$ contains both upper-case and
lower-case letters.
\begin{definition}
a) The\/ \emph{sum} of two series $R$ and $S$ is the series
$$
R + S = (s_0 + r_0) +(s_1 + r_1)x +(s_2 + r_2)x^2 + \dots
$$
b) The\/ \emph{sum} of two languages $L_1$ and $L_2$ is the language $L_1 +
L_2$ over $A$ whose set of admissible words is the union of the sets of
admissible words of the languages $L_1$ and $L_2$.
\end{definition}
%admissible words are the admissible words of $L_1$ and $L_2$.
\begin{problem}
Define the language $L_1+L_2$ by a finite set of forbidden words.
\end{problem}
\begin{problem}
Prove that if $L=L_1 + L_2$, then
$$
L (x) = L_1 (x) + L_2 (x) - 1.
$$
\end{problem}
The product of two series is defined by the same way as the product of two
polynomials.
\begin{definition}
\emph{The product of a series $R$ by a monomial $a x^n$} is the series
$$
R\cdot a x^n = a r_0 x^n + a r_1 x^{n+1} + a r_2 x^2 x^{n+2} + \dots
$$
\emph{The product of two series $R$ and $S$} is the sum
$$
R \cdot S = R\cdot s_0 + R\cdot s_1 x + R\cdot s_2 x^2 + \dots
$$
Note that this infinite sum of series is well-defined because the coefficient
of every power of $x$ is a finite sum of numbers.
\end{definition}
\begin{problem}
\label{progr} Prove that
$$
(1-x) \cdot (1+ x+ x^2 + \dots) = 1.
$$
\end{problem}
\begin{definition}
\emph{The product of two sets of words} $M$ and $ N$ is the set $MN$ of all
words of the form $mn$, where $m$ is a word in $M$ and $n$ is a word in $N$.
\emph{The product of two languages} $L_1$ and $L_2$ is the language $L_1 \cdot
L_2$ over $A$ whose set of admissible words is the product of the sets of
admissible words of the languages $L_1$ and $L_2$.
\end{definition}
%whose forbidden words are the forbidden words of the languages $L_1$ and $L_2$
%and all two-letter words with the first letter in lower case and the second
%letter in upper case.
\begin{problem}
Define the language $L_1\cdot L_2$ by a finite set of forbidden words.
\end{problem}
\begin{problem}
%
%а) Let $M$ and $ N$ be two sets of words. Prove the formula
%$$
% M\cdot N (x) = M(x) \cdot N(x).
%$$
%
%
%
%Let $L = L_1 \cdot L_2$.
%
%
%b) Prove that all all admissible words of the language $L$ have
%the form $Ww$, where $W$ is an admissible word of $L_1$ ($W$
%consists of upper-case letters) and $w$ is an admissible word of
%$L_2$ ($w$ consists of lower-case letters).
%
%c)
Prove that
$$
L (x) = L_1 (x) \cdot L_2 (x).
$$
\end{problem}
The division of series has no version for languages, but it helps to write down
their dimension series in a compact form. It is defined by a formula similar to
the formula for the sum of an infinite geometric progression.
\begin{definition}
Suppose that a series $S$ begins with the unit, that is, $s_0 =
1$, and $S = 1 + \overline S$,
where $\overline S = s_1 x + s_2 x^2 + \dots$
Then its\/ \emph{inverse} is the series
$$
\frac{\d 1}{\d S} = 1 - \overline S +\overline S^2 -\overline S^3 +\dots
$$
The\/ \emph{quotient} of two series $R$ and $S$ is the series
$$
\frac{\d R}{\d S} = R -R\cdot \overline S +
R\cdot \overline S^2 -R\cdot \overline S^3 +\dots
$$
\end{definition}
In general, the quotient of two dimension series can not be obtained as the
dimension series for a language. For example, some of the coefficients of the
quotient can be negative.
\begin{problem}
a) Prove that
$$
S \cdot \frac{\d R}{\d S} = R.
$$
b) Prove that if $S \cdot T = R$, where the series $S$ begins with the unit,
then $T = \frac{\d R}{\d S} $.
\end{problem}
The use of the division of two series is that it helps to represent many
infinite series by a finite formula, that is, a quotient of two polynomials.
\begin{problem}
a) Prove that
$$
F_A(x) = \frac{\d 1}{\d 1 - Nx}.
$$
b) Represent the dimension series from Problems~\ref{b}
and~\ref{3} as a quotient of two polynomials.
\end{problem}
\begin{problem}%\footnote{No elementary solution for this problem is known to the Jury}
\label{grob1}
Prove that the dimension series of any language can be represented as a
quotient of two polynomials.
\end{problem}
Thus, the answer to our Main Problem
%(Problem~\ref{main_p})
should be represented as a quotient of two polynomials.
\section{Free word}
\begin{problem}
Let $L$ be a language over the Latin alphabet with only one forbidden word
``mouse''. Find $L(x)$.
\end{problem}
\begin{definition}
Let $a$ and $b$ be words such that no one is a subword of each other. A
nonempty word $c$ is called an\/ \emph{overlap} of $a$ and $b$ if it is a
beginning subword of $a$ and, in the same time, the final subword of $b$ (for
example, the word ``all'' is an overlap of the words ``ball'' and ``allow'').
A word $w$ is called\/ \emph{free} if it has no overlaps with itself except for
the whole word $w$ (e.g., the word ``free'' is free but the word
``underground'' is not).
\end{definition}
\begin{problem}
Suppose that in a language $L$ over an alphabet $A$ of $N$ letters there is a
single forbidden word, which is free and consists of $m$ letters. Prove that
$$
L(x) = \frac{1}{1-Nx+ x^m}.
$$
\end{problem}
\begin{problem}
Solve Problem~\ref{2} under the addition assumption that the
sequence of commands breaking the computer is a free word.
\end{problem}
\section {Transformations of words}
\begin{definition}
Let $M$ and $M'$ be two sets of words. Let us divide the set $M$ in two parts
$K$ and $L$. A function $f$ mapping $L$ to a subset $I$ of $M'$ is called a
{\it transformation} of the set $M$ to the set $M'$ if $f$ preserves lengths of
words and is a one-to-one map of $L$ onto $I$.
In this case, the set $K$ is called the\/ \emph{kernel} of the transformation
$f$ and the set $I$ is called the\/ \emph{image} of $f$.
\end{definition}
A transformation will be denoted by an arrow: $M \Longrightarrow M'$.
\begin{definition}
A sequence of transformations
$$
M_1 \Longrightarrow M_2 \Longrightarrow \dots \Longrightarrow M_n
$$
is called\/ \emph{exact} if the kernel of each subsequent transformation
coincides with the image of the previous one.
\end{definition}
\begin{problem}
Let $L$ be a language over an alphabet $A$, let $G$ be the set of admissible
words, and let $N$ be the set of all non-admissible words. Construct an exact
sequence of transformations
$$
\emptyset \Longrightarrow N \Longrightarrow F_A \Longrightarrow G \Longrightarrow \emptyset,
$$
where $F_A$ is the set of admissible words of the free language,
that is, the set of all words over the alphabet $A$, and
$\emptyset$ denotes the empty set.
\end{problem}
\begin{problem}
10 boys and 10 girls are sitting in a line so that boys' neighbors are girls
and vice versa; their teacher is sitting next to them. Each of the children has
some bonbons, and the total number of the boys' bonbons is equal to the total
number of the girls' ones. The first boy gives all his bonbons to the girl
sitting next to him. The girl eats all these bonbons, then she eats the same
number of her own bonbons, and then she gives the rest of her bonbons to the
next boy. He does the same (eats and gives the rest of bonbons to the next
girl), then the next girl does the same, and so on. The last girl gives the
rest of her bonbons to the teacher. How many bonbons does the teacher get?
\end{problem}
\begin{problem}
Let $$ \emptyset \Longrightarrow M_1 \Longrightarrow M_2
\Longrightarrow \dots \Longrightarrow M_n \Longrightarrow
\emptyset
$$
be an exact sequence of transformations.
а) Prove that if each set $M_i$ consists of a finite number $m_i$ of words,
then
$$
m_1 + m_3 +m_5 + \dots = m_2 + m_4 + \dots
$$
b) Prove the following formula for the dimension series:
$$
M_1(x) + M_3(x) +M_5 (x) + \dots = M_2(x) + M_4(x) + \dots
$$
\end{problem}
\begin{definition}
A set $M$ of words is called\/ \emph{free} if no word in $M$ is a subword of
another word in $M$, all words in $M$ are free, and the words in $M$ have no
overlaps with each other.
\end{definition}
\begin{problem}
Let $L$ be a language over an alphabet $A$, and let the set $B$ of forbidden
words of $L$ be free. Denote the set of all admissible words by $G$ and the set
of all nonempty admissible words by $\overline G$. Construct an exact sequence
of transformations
$$
\emptyset \Longrightarrow
B\cdot G \Longrightarrow A \cdot G \Longrightarrow \overline G
\Longrightarrow \emptyset.
$$
\end{problem}
\begin{problem}
Let $L$ be a language over an alphabet $A$ of $N$ letters, and let the set $B$
of forbidden words of $L$ be free. Prove the formula
$$
L (x) = \frac{\d 1}{\d 1 - N x + B(x)}.
$$
\end{problem}
\begin{problem}
Prove that the set of magic spells in Problem~\ref{1} is free, and solve the
problem.
\end{problem}
\begin{problem}
Find $L(x)$ provided that the alphabet of the language $L$ is Latin and the
forbidden words are\/ \emph{veni, vidi, vici.}
\end{problem}
\begin{definition}
Let $L$ be a language. A\/ \emph{simple linkage} is a word $v = str$, where
$s$, $t$, $r$ are nonempty words such that the words $g=st$ and $f=tr$ are
forbidden and there are no other forbidden subwords in $v$. The end $r$ of the
simple linkage (which is produced by cutting off the first forbidden subword
$g$) is called the\/ \emph{tail} of $v$.
\end{definition}
\begin{problem}
Prove that the set of forbidden words of a language is free if and only if
there are no simple linkages in it.
\end{problem}
\begin{problem}
\label{syz_2}
Let $L$ be a language over an alphabet $A$, let $B$ be its set of forbidden
words, and let $S$ be the set of all simple linkages. Denote the set of all
admissible words by~$G$ and the set of all nonempty admissible words by~$\o G$.
Construct an exact sequence of transformations
$$
%\emptyset \Longrightarrow
S\cdot G \Longrightarrow B\cdot G \Longrightarrow A \cdot G \Longrightarrow \overline G
\Longrightarrow \emptyset.
$$
\end{problem}
\begin{problem}
Find the conditions on the set of forbidden words of a language $L$ under which
the exact sequence from Problem~\ref{syz_2} could be extended to an exact
sequence
$$
\emptyset \Longrightarrow
S\cdot G \Longrightarrow B\cdot G \Longrightarrow A \cdot G \Longrightarrow \overline G
\Longrightarrow \emptyset
$$
(such languages are called\/ \emph{non-tangled}). Give a formula to express the
dimension series $L(z)$ of a non-tangled language in terms of the number $N$ of
letters and the dimension series of the sets $B$ and $S$.
\end{problem}
\begin{problem}
Find the dimension series of the language over the alphabet $\{a,b,c\}$ with
forbidden words $abb, bbc, bac$.
\end{problem}
\begin{problem}
Find the dimension series of the language over the alphabet $A = \{ x_1, \dots,
x_n , y_1, \dots, y_n,z_1, \dots , z_n \}$, if the forbidden words are the
words of the form $x_i y_j$ and $y_j z_k$, where $1\le i,j,k \le n$.
\end{problem}
\begin{problem}
Prove that if the set of forbidden words of a non-tangled language consists of
a single word, then this set is free.
\end{problem}
\section{Free sets revisited}
\begin{problem}
Construct an infinite free set over an alphabet of two letters.
\end{problem}
\begin{problem}
Suppose that the set of forbidden words $B$ of a language $L$ is
free and the alphabet has more than one letter. Prove that the set
of admissible words of the language is infinite.
\end{problem}
\begin{definition}
Let $S = s_0 + s_1 x + s_2 x^2 + \dots$ and $R = r_0 + r_1 x + r_2 x^2 + \dots$
be two series. If the inequality $s_k \ge r_k$ holds for any $k$, then we say
that the following inequality for the series holds:
$$
S \ge R.
$$
\end{definition}
\begin{problem}
Prove that if series $P$, $Q$, and $R$ satisfy the inequalities
$$
P \ge Q \mbox{ and } R\ge 0,
$$
then
$$
PR \ge QR.
$$
\end{problem}
\begin{problem}
Suppose that for every $d>0$ the sets $B$ and $B'$ of forbidden words of the
languages $L$ and $L'$ over the same alphabet $A$ contain the same number of
words of length $d$, so that $B(z) = B'(z)$. Prove that if the set $B$ is free,
then the inequality
$$
L'(z) \ge L(z)
$$
holds; in addition, we have $L'(z) = L(z)$ if and only if the set $B'$ is also
free.
\end{problem}
\begin{problem}
Suppose that the alphabet consists of two letters and the set $B$ contains at
least two words, including a word $w$ of length 2.
a) Prove that the set $B$ is not free.
b) Is it possible that $B$ is free if $w$ is of length 3?
\end{problem}
\begin{problem}
Suppose that an alphabet consists of $n $ letters and $B$ consists of $g$
two-letter words. Prove that if $g \le n^2/4$, then the set $B$ may be chosen
to be free.
\end{problem}
\begin{problem}
\label{n=kd}
Prove that if $n = k d $ and $m \le k^d (d-1)^{d-1}$, where the numbers
$d,k,m,n$ are positive integers, then, over an alphabet of $n$ letters, one can
choose a free set consisting of $m$ words of length $d$.
\end{problem}
\begin{problem}
а) Prove that, if $B$ is a free set over an alphabet of $n $ letters, then
there is the following inequality
$$\frac{1}{1-nx+B(x)} \ge 1.$$
b) Is the converse true, that is, is it true that if the inequality
$$\frac{1}{1-nx+p(x)}
\ge 1$$
holds for a positive integer $n$ and a polynomial $p(x) $ whose
coefficients are positive integers and whose constant term is
zero, then there exists a free set $B$ over an alphabet of $n$
letters, with $B(x) = p(x)$?
\end{problem}
\begin{problem}%\footnote{No elementary solution for this problem is known to the Jury}
\label{grob2} Let $n$ be a positive integer and let $p(x)$ be a
polynomial with positive integer coefficients and zero constant
term. Prove that there exists a free set $B$ with dimension series
$B(x) = p(x)$ if and only if there exist two polynomials $f$ and
$g$ with nonnegative integer coefficients with $f(0) = g(0)=0$
such that
$$
(1-f)(1-g)\ge 1-nx+p(x).
$$
\end{problem}
\begin{problem}\footnote{Neither a solution nor even an answer to this problem are known to the Jury}
\label{grob3}
Find a condition describing possible dimension series of the sets of forbidden
words for non-tangled languages (like we described dimension series of free
sets in problem~\ref{grob2}).
\end{problem}
\section{Words and chains}
%Мы будем считать, что набор запретных слов в каждом языке задан
%экономно, то есть никакое forbidden слово не является подсловом
%другого запретного слова.
\begin{definition}
\label{def_chain}
Let $L$ be a language. \emph{Chains of length one} are the forbidden words,
and\/ \emph{chains of length 2} are the simple linkages. Next, one can define
the chains of length 3, 4 etc. Namely, a word $v = str$ (where all the words
$s$, $t$, $r$ are nonempty) is called a\/ \emph{chain of length $n$} if its
initial subword $g=st$ is a chain of length $n-1$, the final subword $f=tr$ is
a forbidden word, where $t$ is a subword of the tail $p$ of the chain $g$, and
there are no other forbidden subwords but $f$ in the final subword $pr$. The\/
\emph{tail} of the chain $v$ is the word $r$.
\end{definition}
A chain looks as follows (each arc denotes a forbidden subword in the chain):
\begin{picture}(90,15)
\put(12.5,1){\line(1,0){75}}
\put(20,1){\oval(15,10)[t]}
\put(30,1){\oval(15,10)[t]}
\put(40,1){\oval(15,10)[t]}
\put(50,1){\oval(15,10)[t]}
\put(60,1){\oval(15,10)[t]}
\put(70,1){\oval(15,10)[t]}
\put(80,1){\oval(15,10)[t]}
\put(67.5,2){\line(1,0){20}}
\end{picture}
The length of the chain is the number of arcs. The only overlaps are of
neighboring arcs (and the overlaps of neighboring arcs are non-empty). The
emphasized two final tails do not contain any forbidden subword but the last
arc.
For example, if the only forbidden word is $aba$, then the only chain of length
one is $aba$, the only chain of length two is $ababa$, the only one of length 3
is $abababa$, etc.
\begin{problem}
\label{tourn} Suppose that the forbidden words in a language $L$
are the words ``tournament'', ``of'', ``towns''. Write up all chains of length
$n$.
\end{problem}
\begin{problem}
\label{right_chains}
\emph{Antichains} of length $n$ are defined in the same way as chains of
length~$n$, with the only difference that we read words of $L$ in
Definition~\ref{def_chain} ``from right to left'', i.e., the tail of an
antichain is to the left, and the initial antichain of length~$n-1$ is to the
right. Prove that the sets of length~$n$ chains and length~$n$ antichains
coincide.
\end{problem}
\begin{problem}
\label{chains} Prove that a chain of length $n$
contains no other chain of length $n$ as a subword.
\end{problem}
\begin{problem}
Prove that if a word is decomposed as $w= g c$, where~$g$ is an admissible word
and~$c$ is a chain, then, if in addition the length of $c$ is greater than $1$,
$w$ has exactly two decompositions of this form, and the lengths of the chains
in these decompositions differ by $1$.
\end{problem}
The next problem gives a way to solve the Main Problem.
\begin{problem}
Let $L$ be a language over an alphabet $A$. Let $G$ be the set of its
admissible words and $\o G$ the set of all nonempty admissible words. Let $C_1$
be the set of chains of length one, $C_2$ the set of chains of length 2, and so
on.
Prove that
$$
L(x) = \frac{\d 1}{\d 1 - Nx + C_1(x) - C_2 (x) + C_3 (x) - \dots}
$$
\end{problem}
\begin{problem}
Find the dimension series for the language in Problem~\ref{tourn}.
\end{problem}
\begin{problem}
Find all possible answers to Problem~\ref{2} depending on the form of the breaking sequence.
\end{problem}
\begin{problem}
We say that a subword $c$ of a word $w$ is its\/ \emph{maximal subchain} if $w$
can be decomposed as $w= g c u$, where $g$ is an admissible word and $c$ is a
chain, and for any other decomposition $w= g c' u'$ with another chain $c'$ the
word $c'$ is always a subword of $c$. Prove that any non-admissible word has a
single maximal subchain of odd length.
\end{problem}
\begin{problem}
Let $L$ be a language over an alphabet $A$, and let $A'$ be a
new alphabet which extends $A$ by one additional letter. Let $L'$
be a language over $A'$ with the same list of forbidden words as
$L$. Prove that
$$
L'(x) = \frac{\d 1}{\d \frac{1}{L(x)} - x }.
$$
\end{problem}
\begin{problem}
A language $W$ is called the\/ \emph{free product} of languages $L$ and $L'$
over disjoint alphabets $A$ and $A'$ if the alphabet of $W$ is the union of the
alphabets $A$ and $A'$ and the set of forbidden words is the union of the sets
of forbidden words of~$L$ and~$L'$. Express the dimension series of the free
product $W$ in terms of the dimension series of~$L$ and~$L'$.
\end{problem}
\begin{problem}
\label{kos_dual}
Suppose that all forbidden words of a language $L$ are of two letters. Over the
same alphabet, consider another language ${M}$ whose forbidden words are all
two-letter admissible words of $L$. Prove that
$$
L(x) M (-x) = 1.
$$
\end{problem}
\clearpage
\section{Additional problems}
\begin{problem}
Prove that there exists a free set of $m$ words of length~$d$
over an alphabet of $n=kd$ letters if and only if $m\le k^d
(d-1)^{d-1}$ (cf. Problem~\ref{n=kd})
a) for $d=2$; \qquad b) for $d=3$; \qquad c) for $d >3$.
\end{problem}
\begin{definition}
A language is said to be \emph{$d$-defined} if the maximal length of its
forbidden words is~$d$. A 2-defined language is said to be \emph{quadratic}.
\end{definition}
\begin{problem}
Quadratic languages $L$ and $M$ in Problem~\ref{kos_dual} are said to be\/
\emph{dual} to each other (notation: $M=L^!$).
a) Prove that $(L^!)^! = L$.
b) Find $(L_1 + L_2)^!$.
c) Describe $(L_1 \cdot L_2)^!$.
\end{problem}
\begin{problem}
Let $L$ be a $d$-defined language. Let us define a new language $L^{(n)}$ over
the alphabet consisting of all length $n$ admissible words of~$L$ as the
language whose admissible words are all admissible words of~$L$ whose length is
a multiple of~$n$ (rewritten in the new alphabet).
a) Prove that $L^{(n)}$ is defined by finitely many forbidden words.
b) Is $L^{(n)}$ always $d$-defined?
c) For what minimal $n$, the language $L^{(n)}$ is necessarily
either quadratic or free (for all $d$-defined languages $L$)?
\end{problem}
\begin{problem}
\label{quadr_graph}
For any quadratic language $L$ over the alphabet $x_1, \dots, x_n $, let us
define an oriented graph~$\Gamma_L$ as follows: it has $n$ vertices labelled
with $x_1, \dots, x_n$, and there is an edge (an arrow) $x_i \to x_j$ if and
only if the word $x_ix_j$ is admissible. Denote the number of admissible words
of length $k$ by $a_k$. Prove that
a) the language $L$ is finite if and only if~$\Gamma_L$ has no cycles;
b) the language $L$ has polynomial growth (i.~e., there exist two
nonzero polynomials $p,q$ of the same degree~$d$ with positive
leading coefficient such that $p(k) \ge a_k\ge q(k)$ for each
$k\ge 0$) if and only if $\Gamma_L$ has a cycle but has no
intersecting cycles;
c) the language $L$ has exponential growth (i.~e., for some
$c_1>c_2>1$ and for all $k$, we have $c_1^k \ge a_k \ge c_2^k
$) if and only if $\Gamma_L$ has at least two intersecting cycles.
\end{problem}
\begin{problem}
Let $L$ and $L^!$ be a pair of dual quadratic languages. Is it possible that
both have exponential growth?
\end{problem}
\begin{problem}
For any $d$-defined language $L$ over the alphabet $x_1, \dots, x_n $, we
define the oriented graph $\Gamma_L$ as follows: its vertices are labelled with
all admissible words of length $d-1$, and there is an edge (arrow) $v\to w$ if
and only if there is a letter $x_i$ such that the word $vx_i$ is admissible and
the last~$d-1$ letters of it constitute the word $w$. Prove all properties a),
b), c) in Problem~\ref{quadr_graph} for $\Gamma_L$.
\end{problem}
\begin{definition}
Let $M$ be a set over an alphabet $A$. Words $u$ and $v$ (over the same
alphabet) are said to be \emph{$M$-equivalent} if, for any word $w$, the words
$uw$ and $vw$ either both belong to $M$ or neither of them belongs to~$M$. The
set $M$ is said to be \emph{regular} if there is a natural number $n$ such that
any set of~$n$ contains two $M$-equivalent words.
\end{definition}
\begin{problem}
Prove that the set of admissible words of any language is regular.
\end{problem}
\begin{definition}
A \emph{finite automaton} over an alphabet $A$ is an oriented graph $\Gamma$
with a finite set of vertices $V$ such that
\noindent a) the arrows are marked by the letters of the alphabet
$A$, and for every vertex $v\in V$ and each letter $a\in A$, there is a unique
arrow marked by $a$ whose tail is $v$;
\noindent b) an \emph{initial vertex } $v_0\in V$
and a \emph{set of approving vertices} $W\subseteq V$ are given.
Let us consider each word over the alphabet $A$ as an instruction for a trip by
arrows over the finite automaton $(\Gamma,v_0,W)$, that is, we begin with the
initial vertex, then go by the arrow marked by the first letter of the word,
then follow the arrow marked by the second letter of the word, and so on. We
say that the automaton \emph{approves} a word if the path corresponding to the
word ends with an approving vertex.
\end{definition}
\begin{problem}
a) Prove that for every regular set $M$ there exists a finite automaton
approving the words of $M$ and no other words.
b) Prove that for every finite automaton the set of approving words is regular.
\end{problem}
\begin{problem}
Prove that for every regular set $M$ its dimension series can we represented as
a quotient of two polynomials.
\end{problem}
\begin{problem}
Let $L$ be a language and $M_{ w}$ the set of all admissible words of~$L$ which
have a final subword equal to a given word $w$. Prove that the dimension series
of the set $M_{ w}$ can we represented as a quotient of two polynomials.
\end{problem}
\bigskip
Note. Parts 1--5 were suggested before the intermediate consideration of the problems.
Parts 6--8 were added after the intermediate consideration of the problems.
\end{document}