#!/usr/bin/perl # # Copyright (c) 1997-2024 Robert Munafo # License: Creative Commons Attribution-NonCommercial 4.0 International # Source: http://mrob.com/time/automome/automeme-pl.txt # # LICENSE # # This software, and the sample memes file that goes with it (which is at # http://mrob.com/time/automome/automeme-pl.txt ) are made available under # a Creative Commons Attribution-NonCommercial 4.0 International License. # License details are at http://mrob.com/cc-license.html use strict; # These are required for UTF-8 parsing, such as a regexp that matches any # character including non-ASCII characters. Here is an example: # # # Define a list of foreign words (this list only has one) # class foreign-words: # Πυθαγόρας # # Regex that extracts the first letter of something, or in an ms/// # # context removes all but the first letter. # regex initial-letter: /^(.).*$/$1/ # # Now define our templates # templates: # # Select a foreign word, then state " starts with ''" # [foreign-words] starts with '[$0/initial-letter/]' # # however on older OS's, like MacOS 10.4, using these 'use' statements # sometimes causes false errors from Perl: # # utf8 "\xE5" does not map to Unicode at /Users/munafo/bin/automeme # line 591, <$IN> line 3137. # # The bug seems to occur when a UTF-8 sequence straddles the boundary # between buffer-sized chunks of the input stream. It does not actually affect # the ability of #automeme# to process meme templates containing UTF-8. # Therefore, we save a copy of STDERR for use in our own error output, # and redirect STDERR itself to /dev/null use utf8; use open ':std', ':encoding(UTF-8)'; open (SERR, ">&STDERR"); # Used in reg.error() close(STDERR); open(STDERR, ">/dev/null"); my $bapropos = q@ automeme(1r) -- generate random bits of text using templates that match popular internet 'memes' @; my $g_memefile = "ott-snowclones.txt"; my $g_literal_template = ''; my $g_maxtries = 10000; my $help = qq` NAME automeme -- generate random bits of text using templates that match popular internet 'memes' DESCRIPTION Given no argument, this script just prints out a string of text that is generated using a randomly-selected template and filling in, after the style of "Mad Libs". The meme definitions are in a file called "$g_memefile" in the current directory (or in ~/data/memes if not found in curdir) ; this can be changed with the -o and -d options. OPTIONS pat - Memes matching this pattern are suitable (details below) +pat - All memes must match this pattern (details below) -pat - All memes must NOT match this pattern (details below) -c - Copy output to clipboard with #pbcopy# -d foo/bar.txt - Read the meme definitions from "foo/bar.txt". You can give any pathname; if nothing is found it prints an error and exits. You can also give just a filename (i.e. with no slash '/') , in which case it will look first in the current directory, then in ~/data/memes -Dm - Debug: show each step of substitution -ef - Errors are fatal: used with -n, #automeme# will stop as soon as any meme expansion generates an error. -ist - Identify source templates: Each meme's template will be printed along with the generated text. -lc 60 - Limit number of characters of any output lines to 60 -mt - Maximum tries: Try this many times to find at least one meme matching the filter conditions (default $g_maxtries) -n 27 - Make 27 lines of random memey goodness -o - Use the original AUTOMEME snowclone database, "automeme.txt", either in the current directory or in ~/data/memes -prefix Foo - Prepend text 'Foo' to every meme output -s - Speak output (requires 'espeak' on Linux or 'say' on Mac) -subject - Give results suitable for OTT subject lines -t '[noun]' - Instead of selecting templates at random from all of those provided in the meme definitions file, use only this string as the template. It can be just a class name in brackets, or a complete template definition. -utf - Convert small caps and underline to UTF-8 format. -v - Verbose: Print some debugging messages. -vv - Very verbose: error messages will include call-chain diagnostics. FILTER PATTERNS You may specify one or more pattern(s) that must match, or not match, for a meme to be printed. The syntax is: automeme [opts] [nominate] [+require] [-veto] [...] Keywords (actually Perl regular expressions) are of three types: Nominate, Require and Veto. The Require and Veto types are flagged by a leading '+' or '-' respectively. For a pathname to be a match, it must: match ALL of the Require patterns, AND match AT LEAST ONE of the Nominate patterns, AND match NONE of the Veto patterns. This is equivalent to a 'boolean expression' like this: match = require1 AND require2 AND ... AND (nominate1 OR nominate2 OR ...) AND (NOT (veto1 OR veto2 OR ...)) If no Nominate patterns are given, the Nominate requirement is skipped. If all patterns are of type Nominate, they will be treated as if they were all of type Require. This provides the maximum compatibility with both "Google syntax" (where '+' is assumed and there is no Nominate type) and "Alta Vista" syntax (which has all three types). To force "fuzzy Google matching" where all patterns are treated as type Nominate all the time, include "+." as one of your patterns. See the following examples. FILTER EXAMPLES Both of the following do the same thing: print one meme containing 'POPE': automeme POPE automeme +POPE Both of the following do the same thing: print a meme containing both 'POPE' and 'BOT': automeme POPE BOT automeme +POPE +BOT This prints a meme containing either 'POPE' or 'BOT' (or both). The null pattern '+.' is required, otherwise this command will do the same thing as the previous example: automeme POPE BOT +. Both of the following do the same thing: print a meme containing 'POPE' but not containing 'BOT': automeme POPE -BOT automeme +POPE -BOT This prints a meme containing either 'POPE' or 'MOME', which definitely contains 'BOT' and which does not contain 'HOW': automeme POPE MOME +BOT -HOW FILE FORMAT A meme data file can include blank lines or comments starting with '#', which will be ignored. A line starting with 'class', such as 'class adjective:', starts a list of words/phrases of a particular category. A line starting with 'templates:' begins the meme templates (snowclone definitions). The format of meme templates is demonstrated by the examples in &examples(), which are executed if you pass the arguments "-n 0". DEBUGGING To detect errors in the memes template file, use the command automeme -n 10000 -vv -ef > /dev/null This tells it to generate a lot of memes and print error messages (-vv), stopping on the first error (-ef), and discarding the good (non-error-producing) memes. SEE ALSO fortune - The classic adage generator magic-8-ball - divination via the well-known Tyco child's toy. markov1 - First-order Markov chain on ASCII text input `; my $unused_block_header = q` REVISION HISTORY 20140406 First version (cloned from #skel#). Get engine working. Convert @Link's OTT meme database into this grammar. 20140407 Fix a bug in rep.2 index calculation; allow random selection between two words with [FOO@0.27, BAR] 20140408 Add -o option. 20140409 Allow multiple regexps in history substitution; add '[foo]ms/bar/baz/' generator for defining classes based on other classes. 20140411 Fix a regexp bug; allow two fixed-probability literals 20140415 Add cut operator '[]' 20140427 UTF8 compliance. 20140505 Add -lc option. 20140723 Add multiclass syntax. 20140725 In &rep.3, check for the 'LITERAL@0.12, ' parts first, and allow three such parts. Add more syntax examples to &examples() Implement isomorph syntax. $1/foo/bar regexp syntax now allows 'g' modifier. 20140730 &rep.3 now accepts arbitrarily many random literal parts. Add an explanatory block comment on multiclass. 20140731 -v option shows an error message if an empty class is used in a replaceable context. 20140803 Add some more error detection. 20140804 Template-expansion code (functions me.meify, rep.1, rep.2 and rep.3) can now call itself recursively. 20140804 Add named regex syntax (but only for the ms// selector when building derived classes) 20140805 Support named regexes in replaceables; add more error detection. &rep.3 now tries to make sure each string acquired from a call to rep.2 is different from any previous ones (within this particular call to me.meify). 20140806 Add named templates and recursion. Add more error-checking. 20140807 More error detection 20140813 More restrictive test for 'class has no members' error 20140830 Do not need the UTF-8 "use" statements. 20140907 You can now give fixed probabilities to templates and/or classes in a rep.3 replaceable. 20140908 Add -t option. 20140909 Add -d option. 20150113 Increase recursion/complexity limit to 143 20150816 Add -ist option. 20151207 Add pattern matching (nominate/require/veto) functionality, -i option, and &match.2 20160503 Add -subject and -prefix options. 20170329 Add -c option. 20170408 Add a bit to some of the error messages. 20170414 Examples now show both the templates and their output. 20170427 New subject length limit is 120 characters 20171207 Improved error messages for malformed one-line template definitions 20171219 &rep.3 now allows multiple-choice alternatives to use history variables. In the process I have deprecated "[FOO@0.2, noun, place]" in favour of "[FOO@0.2, noun@0.5, place]"; I update examples() to reflect this. 20171222 Errors regarding meme syntax, stc. now go to STDERR 20171223 "[FOO@0.2, noun, place]" syntax is once again supported; 20171225 Fix a bug in rep.2: commas no longer removed if no defined names were found. 20171227 Add tag.vv, report top-level source line numbers on most error messages. 20171228 Fix bugs in traceback reporting; add -ef and -vvv options. 20171229 Add -Dm option 20171230 Slight improvements to -Dm output. 20180101 Fix a bug in &mc.setup that was redefining e.g. mc_isomorphs{'verb-tr-perfect/-past'} = 'ofc_thing-past' which would have caused really confusing problems if any memes had ever tried to isomorph off the 3rd or later variant of a multiclass member. 20180102 Multiclass unions are now supported. 20180104 Fix a bug that was returning null string when picking a member of a common class whose class name was also the basename of a multiclass. 20180106 -t option need not be in brackets, if it is a single class or template name: "-t [whatif]" is now equivalent to "-t whatif" 20180327 Add -s option. 20180329 Add parsing of 'pronounce' directive, but don't do anything with it yet. 20220424 -utf now does italics; fix a bug in the smallcaps filter 20220503 Fix -s option to remove characters like '*' before passing to #say# or #espeak# 20220513 Add keyword 'class1w' to specify simple classes (word lists) using whitespace-separated text (rather than the newline separation required by 'class') 20220521 Fix error that caused templates following a class1w to be parsed into pieces (and each treated as a separate template) 20220822 Add -mt option. BUGS and TO-DO Add a pattern option: -s 'FOO.*BAR' - "Search" for memes matching the given pattern. #automeme# will try to determine if the pattern is impossible and report an error immediately. To detect if a pattern is impossible, extract all literal substrings from the pattern and grep the meme definitions file for them. For example, if "FOO" doesn't appear verbatim in a class or template, it can only result from catenation or regex manipulation. Use named templates to improve the Doge meme (as illustrated below) MULTICLASS and ISOMORPHS A *multiclass* is a set of classes whose members have a one-to-one correspondence with each other. Here is an example of a multiclass defining singular and plural nouns: multiclass noun[-singular, -plural]: CAT | CATS FISH | FISH GOOSE | GEESE This defines classes "noun-singular" and "noun-plural". Each noun has singular and plural forms, called "declensions". It is also useful to define a multiclass for verbs, in which case we have "conjugations": multiclass verb-transitive[-present, -past]: CARRY | CARRIED SEE | SAW THROW | THREW The general term for declension and conjugation is "inflection", so each item in a line is a different inflection of the words next to it: "CATS" is the plural inflection of "CAT" and "SAW" is the past inflection of "SEE". However, a multiclass is useful for things that are not inflections. Consider the following examples: multiclass animal[-singular, -plural, -collective]: CAT | CATS | CLOWDER COW | HERD | CATTLE FISH | FISH | SCHOOL GOOSE | GEESE | FLOCK multiclass vehicle[-singular, -operator]: BICYCLE | RIDER CAR | DRIVER SHIP | CAPTAIN TRAIN | ENGINEER Here we use the multiclass to define *isomorphisms*, such as "FISH is to SCHOOL as CAT is to _______" (what is the collecive noun for cats?) and "CAR is to DRIVER as TRAIN is to _______" (what do you call the driver of a train?). The different words in a row cannot be called "inflections" so intead I call them "isomorphs". To use a multiclass, first invoke a class member using its normal class name, then get an isomorph using the history variable, a slash, and the ending of the class name for the isomorph. For example, the template: I SAW A [animal-collective] OF [$0/-plural]. which might generate the sentence "I SAW A SCHOOL OF FISH." NAMED TEMPLATES A template can be given a name via label, allowing it to be invoked the same way a class is named (as an option within a replaceable). Labels must contain no spaces or other odd punctuation; if you want a literal colon you can use %3a. To use the labeled template as a macro/substring of a longer template, just invoke its label inside brackets as if it were a class, e.g. "foo bar [baz]" By default, a template defined this way exists *only* for use within another template; to remind you of this the template's definition must end with a cut []. To use the named template, use the syntax: class noun: ... class verb-inf-1word: ... templates: template decree01: TAKE THIS [noun] AND [verb-inf-1word] IT[] [decree01] template decree02: KEEP CALM AND [verb-inf-1word] ON[] [decree02] template decree: [OPTIONAL @0.3, ]DECREE: [decree01, decree02][] Three templates are defined, but only the first two will appear in output because they are invoked as templates. As seen in the example above, you can invoke a set of labeled templates with "[foo, bar, baz]" which will cause one to be picked and a recursive call to rep.3. Whenever a class *or* named template is invoked as "[foo]", #automeme# keeps track of the literal result that is substituted for it, and if the same invocation appears again, it tries to avoid generating the same literal text. For example a Doge meme template could be constructed: templates: template doge1: [WOW@0.5, NEAT][] template doge2: SO [noun-singular][] template doge3: MANY [adjective][] template doge4: SUCH [verb-cont][] template doge5: VERY [noun-singular][] template doge6: MUCH [adjective][] template doge: [doge1, doge2, doge3, doge4, doge5, doge6] [doge] - [doge] - [doge] - [doge] - [doge] The five instances of "[doge]" will generate five different results; however it might (and usually will) end up making two of the same type, e.g. two MANYs. If for some reason you wish to allow duplicates to happen, you can do it this way: template dd1: [doge][] template dd2: [doge][] template dd3: [doge][] template dd4: [doge][] template dd5: [doge][] [dd1] - [dd2] - [dd3] - [dd4] - [dd5] `; my $hd = $ENV{"HOME"}; my $p_classname = "[-_a-z0-9]+"; my $p_mul_names = "[-, _a-z0-9]+"; my $g_icase = 0; my $g_identify_source_template = 0; my $g_iterations = 1; my $g_limit_characters = 9999; my $g_root_template = ''; my $g_root_lnum = 0; my $g_say = 0; my $g_vv_sptr = 0; my $g_verbose = 0; my $g_input_lnum = 0; my $g_cur_inline = ""; my $complexity; my $saybin; my @kw_veto; my $num_veto = 0; my @kw_require; my $num_require = 0; my @kw_nominate; my $num_nominate = 0; my %error_seen; my $g_tag_active = 0; my $g_error_count = 0; my @g_stack; my $g_vv_sptr = 0; my @g_l_r; my $g_Dm = 0; my $g_logall = 0; my @mfy_log; my $mlptr = 0; my @mltag; my @mlindent; my $mlnest = 0; my $mli_prev = 0; my @sptr_nest; sub reg_error { my($msg) = @_; if ($error_seen{$msg}) { $g_tag_active = 0; } else { print SERR "*** Error: $msg\n" if ($g_verbose); $g_tag_active = 1; $error_seen{$msg} = 1; } $g_error_count++; } # End of reg.error sub mladd { my($l, $j) = @_; my($t, $doit); $doit = 0; if ($mlptr == 0) { $doit = 1; } if ($l ne $mfy_log[$mlptr-1]) { $doit = 1; } if ("[$l]" eq $mfy_log[$mlptr-1]) { $doit = 0; } if ($doit) { if ($mlptr == 0) { # No need for any indentation yet } elsif ($g_vv_sptr > $mli_prev) { # go in further $mlnest++; $sptr_nest[$g_vv_sptr] = $mlnest; } elsif ($g_vv_sptr < $mli_prev) { $mlnest = $sptr_nest[$g_vv_sptr]; } $mlindent[$mlptr] = ". " x $mlnest; $mfy_log[$mlptr] = $l; if ($j >= 0) { $t = sprintf("%3d %2d %2d", $complexity, $g_vv_sptr, $j); } else { $t = sprintf("%3d %2d ", $complexity, $g_vv_sptr); } $mltag[$mlptr] = $t; $mlptr++; $mli_prev = $g_vv_sptr; } } # End of ml.add sub dump_mfylog { my($i); for($i=0; $i<$mlptr; $i++) { print SERR "$mltag[$i] $mlindent[$i]$mfy_log[$i]\n"; } } # Add additional info to an error message (but only if -vv option was given) sub tag_vv { my($msg) = @_; my($i, $j, $gsp, $t1, $t2, $ind); if ($g_tag_active) { # We just registered a not-before-seen type of error message if ($g_verbose > 1) { # User gave -vv option; here's where we actually need to do stuff if ($msg eq "ex") { if ($g_verbose > 2) { &dump_mfylog(); } if ($g_root_lnum == -1) { $msg = "from command line -t '$g_literal_template':"; } elsif ($g_root_lnum == -2) { $msg = "from builtin example:"; } else { $msg = "source line $g_root_lnum, anonymous template:"; } $ind = ($g_vv_sptr > 20) ? " " : " "; # %%% here we'll append the stack contents from push.vv, if any for($i=0, $j=0, $gsp = ''; $i<$g_vv_sptr; $i++) { if ($gsp eq $g_stack[$i]) { } elsif ($gsp eq ('['.$g_stack[$i].']')) { } else { $t1 = $g_l_r[$i]; $t2 = $g_stack[$i]; $t1 =~ s/ /$t2/; $msg = $msg . "\n " . ($ind x $j) . $t1; $j++; } $gsp = $g_stack[$i]; } } print SERR " : $msg\n"; } elsif ($g_verbose) { # User gave -v but not -vv, hint them print SERR " : (use -vv option for more details about this error)\n"; } } } # End of tag.vv # push.vv will add a nested message, for use in showing the recursive expansion # that led to an error sub push_vv { my($msg, $l, $r) = @_; $g_stack[$g_vv_sptr] = $msg; $g_l_r[$g_vv_sptr] = "$l $r"; $g_vv_sptr++; } sub pop_vv { if ($g_vv_sptr > 0) { $g_vv_sptr--; } } # End of pop.vv # Check a regexp pattern to determine if it is a valid Perl regexp. sub validate_regexp { my($pat, $nonfatal) = @_; my($evstr); if (" $pat)" =~ m/[^\\][\$][^|)]/) { # Allow no variables return 0; } if (" $pat" =~ m/[^\\][\%][^0-9a-fA-F]/) { # Allow no % variables except hexadecimal return 0; } if (" $pat" =~ m/[^\\][\@]/) { # Allow no @ variables return 0; } if (($pat =~ m/\|/) && ($pat =~ m|/|)) { # Cannot handle, fail ®_error("Pattern cannot use both '|' and '/': $pat"); return 0; } elsif ($pat =~ m|/|) { $evstr = "'foo' =~ m|$pat|"; } else { $evstr = "'foo' =~ m/$pat/"; } eval $evstr; if ($@) { # Failage # die "val fail /$pat/\n"; return 0; } # It must have succeeded return 1; } sub eval_repl { my($l, $from, $to, $mod) = @_; my($rv, $evstr); $rv = $l; $evstr = '$rv =~ '; if (($from =~ m/\|/) && ($from =~ m|/|)) { # Cannot handle, just return original string return $l; } elsif ($from =~ m|/|) { $evstr .= "s|$from|$to|$mod;"; } else { $evstr .= "s/$from/$to/$mod;"; } eval $evstr; if ($@) { # Failage # die "repl fail /$from/$to/\n"; return $l; } # It must have succeeded return $rv; } sub eval_match { my($l, $pat, $mod) = @_; my($rv, $evstr); $rv = 0; $evstr = '$rv = ($l =~'; if ($pat =~ m|/|) { $evstr .= "m|$pat|$mod)"; } else { $evstr .= "m/$pat/$mod)"; } eval $evstr; if ($@) { # Failage # die "match fail '$pat'\n"; return 0; } # It must have succeeded return $rv; } my %class_pop; my %class_member; my %cm_lnum; my %named_regex; my %named_template; my %nt_lnum; my %pronounce; # %%% assignments to %class_member need to also save line number in %cm_lnum # special line numbers: # -1 literal template given by -t command-line argument # -2 example in examples() # # Recursion structure is: # memeify # rep3 # avoid_rep2 # rep2 # memeify # ... # Add line of text to current class sub add1 { my($class, $l) = @_; my($key); $class_pop{$class} += 0; # Make sure this is not the null string # when defining the first element $key = "$class $class_pop{$class}"; $class_member{$key} = $l; $cm_lnum{$key} = $g_input_lnum; # print "class_member{$key} = $class_member{$key}\n"; $class_pop{$class}++; } my $g_mc_width; my @mc_names; my @mc_vars; my %is_multiclass; my %mc_isomorphs; my %multiclass_basenames; my %mc_columns; my $mc_basename; my %mcb_class_pop; # Define the names of each of the classes in a multiclass. sub mc_setup { my($base, $vars) = @_; my($name, $b2, $var); undef @mc_names; undef @mc_vars; $g_mc_width = 0; $vars =~ s/ //g; foreach $var (split(/,/, $vars)) { $name = $base . $var; $mc_names[$g_mc_width] = $name; # print "mc_names[$g_mc_width] = $name\n"; $is_multiclass{$name} = $g_input_lnum; $mc_vars[$g_mc_width] = $var; $g_mc_width++; } $mc_columns{$base} = $vars; # print "mc_columns{$base} = $vars\n"; # Create the isomorph mappings for $b2 (@mc_names) { for $var (@mc_vars) { $mc_isomorphs{"$b2/$var"} = $base . $var; # print "mc_isomorphs{'$b2/$var'} = '$base$var'\n"; } } $multiclass_basenames{$base} = 1; $mc_basename = $base; } # End of mc.setup # Add a list of items to all the classes in the current multiclass. The input # is a line like "FOO | BAR | | BAZ" where the items are separated by # '|'. Any blank items will cause empty strings to be put into the corresponding # sub mc_add { my($l) = @_; my($i, $class, $key, $var); $i = 0; foreach $var (split(/\|/, $l)) { if ($i < $g_mc_width) { $var =~ s/^ +//; $var =~ s/ +$//; # print "mcs $i $var\n"; $class = $mc_names[$i]; $class_pop{$class} += 0; $key = "$class $class_pop{$class}"; $class_member{$key} = $var; # print "class_member{'$key'} = '$var'\n"; $cm_lnum{$key} = $g_input_lnum; $class_pop{$class}++; $i++; } } # Make sure blanks are put in any columns that were left out while ($i < $g_mc_width) { $class = $mc_names[$i]; $key = "$class $class_pop{$class}"; $class_member{$key} = ''; $cm_lnum{$key} = $g_input_lnum; $class_pop{$class}++; $i++; } # Also need to remember population for the basename $mcb_class_pop{$mc_basename}++; } # End of mc.add # Load data for vocabulary and snowclone templates sub load_data { my($dir, $fn, $fp, $l, $class, $key); my($vocab, $mclass, $mdim, $p1, $p2, $cl1word, $w1); # %%% In future the meme database could be selected by an environment # variable, command-line option and/or rc file. $fn = "$g_memefile"; # Look for database in curdir or in ~/data/memes if ($fn =~ m|^/|) { # User gave absolute path $fp = $fn; } else { # Relative path: look in . or in ~/data/memes $dir = "."; if (!(-f "$dir/$fn")) { $dir = "$hd/data/memes"; } $fp = "$dir/$fn"; } die "No file '$fp'\n" if (!(-f $fp)); print "Reading meme definitions from $fp\n" if ($g_verbose); open(my $IN, $fp); $g_input_lnum = 0; while($l = <$IN>) { chomp $l; $g_cur_inline = $l; $g_input_lnum++; $l =~ s/[\t]/ /g; $l =~ s/^ +//; $l =~ s/ +$//; $l =~ s/ +/ /g; if ($l eq '') { # Blank line -- ignore } elsif ($l =~ m/^#/) { # Comment delimiter -- ignore } elsif ($l =~ m/^class +($p_classname):$/i) { # Switch to class mode, begin adding elements to a class $class = $1; $vocab = 1; $mclass = ''; $cl1word = 0; } elsif ($l =~ m/^class1w +($p_classname):$/i) { # Switch to class mode, set 1-word flag $class = $1; $vocab = 1; $mclass = ''; $cl1word = 1; } elsif ($l =~ m/^multiclass +($p_classname) *\[($p_mul_names)\]:$/i) { # Switch to multiclass mode, begin adding elements to a multiclass $mclass = $1; $mdim = $2; # print "mc.setup($mclass, $mdim)\n"; &mc_setup($mclass, $mdim); $vocab = 1; $class = ''; $cl1word = 0; # Suppress adding to simple class } elsif ($l =~ m|^pronounce: */([^/]+)/([^/]+)/ *$|i) { $p1 = $1; $p2 = $2; if (&validate_regexp($p1)) { $pronounce{$p1} = $p2; } else { ®_error("Could not validate pronounce regex /$p1/"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } } elsif ($l =~ m|^regex +($p_classname): *(/.+/.*/[gi]*) *$|i) { # Named regex definition (allowed within any mode) $p1 = $1; $p2 = $2; $named_regex{$p1} = $p2; } elsif ($l =~ m|^regex +($p_classname)|i) { ®_error ("Malformed regex definition '$1'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^regex +|i) { ®_error ("Malformed regex declaration"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^template +($p_classname): +(.*)\[\] *$|i) { # Named template definition (allowed within any mode) $p1 = $1; $p2 = $2; if ($named_template{$p1} ne "") { ®_error("template '$p1' is already defined"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($p2 =~ m/\[\].*\[\]/) { ®_error("Template may have only one cut '[]' and one terminal '[]'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } $named_template{$p1} = $p2; $nt_lnum{$p1} = $g_input_lnum; # print "named_template{$p1} = $p2\n"; } elsif ($l =~ m|^template +($p_classname): +(.*)\[\]|i) { ®_error("one-line 'template' definition should have nothing after final '[]'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^template +($p_classname): +(.*)|i) { ®_error("one-line 'template' definition should have '[]' at the end"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^template +($p_classname) +:|i) { ®_error("one-line 'template' must not have blank space between classname and ':'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^template +($p_classname)|i) { ®_error("one-line 'template' definition should have ':' after the classname"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m|^template |i) { ®_error("'template' must be followed by a classname"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m/^templates:$/) { # Switch to anonymous templates mode, begin adding anonymous templates $class = 't:'; $vocab = 0; $mclass = ''; $cl1word = 0; } elsif ($class ne '') { # Add an item to current class (this includes word classes, and the # special class 't:' for anonymous templates) my($fcl, $from, $to, $i, $key, $mod); if ($vocab && ($l =~ m|^\[($p_classname)\] *ms/([^/]+)/(.*)$|)) { # Bulk-add members of another class, with regex substitution $fcl = $1; $p1 = $2; $p2 = $3; if ($named_regex{$p1} ne '') { $p1 = $named_regex{$p1}; } else { $p1 = "/$p1/$p2"; } $p1 =~ tr/{}/[]/; if ($p1 =~ m|/([^/]+)/([^/]*)/(i?)|) { $from = $1; $to = $2; $mod = $3; # print "foo1 |$fcl|$from|$to|\n"; if ($class_pop{$fcl} <= 0) { ®_error("Class [$fcl] has no elements"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif (&validate_regexp($from)) { for($i=0; $i<$class_pop{$fcl}; $i++) { $key = "$fcl $i"; $l = $class_member{$key}; if (&eval_match($l, $from, $mod)) { # print "foo1 '$l' s/$from/$to/$mod"; $l = &eval_repl($l, $from, $to, $mod); # print "-> '$l'\n"; &add1($class, $l); } else { # print "foo4 '$l' does not match /$from/\n"; } } } else { ®_error("Could not validate regex /$from/ (a)"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } } else { ®_error("Could not parse regex $p1"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } } elsif ($vocab && ($l =~ m|^\[($p_classname)\] *$|)) { # Bulk-add members of another class, without modification $fcl = $1; if ($fcl eq $class) { ®_error("Class '$fcl' cannot include itself"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($class_pop{$fcl} <= 0) { ®_error("Cannot include '$fcl' because it has no members"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } else { for($i=0; $i<$class_pop{$fcl}; $i++) { $key = "$fcl $i"; $l = $class_member{$key}; &add1($class, $l); } } } elsif ($cl1word && ($class ne 't:')) { # Add a group of single items separated by whitespace foreach $w1 (split(/ /, $l)) { if ($w1 ne '') { &add1($class, $w1); } } } else { # Add a single item: plain word/phrase or anonymous template. if ($class eq 't:') { # Adding an anonymous template if ($l =~ m/\[\] *$/) { ®_error("Anonymous template should not end in '[]'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($l =~ m/^.*\[\].*\[\]/) { ®_error("Anonymous template may have only one '[]'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } } &add1($class, $l); } } elsif ($mclass ne '') { my($fcl, $col, $n); # Add to current multiclass if ($l =~ m|^\[($p_classname)\] *$|) { $fcl = $1; # ®_error("Cannot add '$fcl' to '$mclass' because unions are not yet supported"); # &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); $n = $mcb_class_pop{$fcl}; if ($multiclass_basenames{$fcl} == 0) { ®_error("Cannot include '$fcl' because it is not a multiclass"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } elsif ($n <= 0) { ®_error("Cannot include '$fcl' because it has no members"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } else { # print "union $n members of '$fcl' into '$mclass'\n"; foreach $col (split(/,/, $mc_columns{$mclass})) { my($fclass, $fcpop, $fcx, $tclass, $tcpop, $i); $fclass = "$fcl$col"; $fcpop = $class_pop{$fclass} + 0; $tclass = "$mclass$col"; $tcpop = $class_pop{$tclass} + 0; $fcx = $is_multiclass{$fclass}; if (!($fcx)) { ®_error("Cannot add '$fcl' to '$mclass' because the former has no variant '$col'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } # Even if error, we add anyway so that the populations of each # variant stay in sync # print "$fcx add $fcpop members of [$fclass] to [$mclass$col] starting at $tcpop\n"; for($i=0; $i<$n; $i++) { &add1("$mclass$col", $class_member{"$fclass $i"}); } } } } else { # Add a single row &mc_add($l); } } else { # No mode has been set, so we don't know what type of object is # being added ®_error("Non-comment outside 'class:', 'class1w:', 'multiclass:' or 'templates:'"); &tag_vv("in line $g_input_lnum:\n $g_cur_inline"); } } close $IN; } # End of load_data # variables used within me.meify, and re-instantiated by me.meify when # it calls itself recursively. See www.perlmonks.org/?node_id=66677 our $g_rep2_index; our $g_rep2_class; our @g_subst_history; our $n_meme_substitutions; our @g_sh_index; our @g_sh_class; our @g_sh_oa; our %rep2_results; my $base_template; # me.meify takes a full meme template and expands it. Templates can invoke labeled sub-templates. The final result is returned as a string. # me.meify does not perform the final step of un-escaping %% literals. sub memeify { my($meme) = @_; my($j, $r1); # Locals are for communication between the nested subroutines local $g_rep2_index; local $g_rep2_class; local @g_subst_history; local $n_meme_substitutions; local @g_sh_index; local @g_sh_class; local @g_sh_oa; local %rep2_results; # print SERR sprintf("%3d %2d '%s'\n", $complexity, $g_vv_sptr, $meme); # print "foo1 gsh[0] == $g_subst_history[0]\n"; &push_vv($meme, '"', '"', "memeify"); # Return a single substitution from a given class, chosen at random sub rep1 { my($class) = @_; my($key, $i, $t, $l); $i = int($class_pop{$class} * rand); $key = "$class $i"; $t = $class_member{$key}; $l = $cm_lnum{$key}; return (($t, $l)); } # End of rep.1 sub mf_named { my($tnam) = @_; my($cls); &push_vv($tnam, '[', ']', "mf_nameed"); if ($g_logall) { &mladd($tnam, -1); } $cls = &memeify($named_template{$tnam}, $nt_lnum{$tnam}); &pop_vv(); return $cls; } # End of mf.named # rep.2 takes a list of classes OR a list of named templates, and either # selects a class member or selects and executes a template. # # In the first form, it returns a substitution chosen from one or more # classes given a list of those classes' names. The distribution # will be even by individual class-member, not by class. For # example, if you give the classes "fruits, vegetables" and if there # are 14 vegetables and 11 fruits, then this function will return a # vegetable 14/(14+11) = 56% of the time. # # In the second form, it chooses a named template given a list of template # names, then executes the selected template by making a single recursive # call to mf.named. In this instance, each named template has an equal # probability of being chosen. # # The list passed here must consist entirely of (multi-)classes OR of # templates, with no literal text items and no probability tags. All of # these limitations are removed by rep.3. sub rep2 { my($classes) = @_; my($cls, $tpop, $ncls, $i, $na, $nr, $recurse, $oc); my(@bases); my(@cnames); &push_vv($classes, '[', ']', "rep2"); $oc = $classes; $classes =~ s/,/ /g; $classes = " $classes "; $ncls = 0; $tpop = 0; $recurse = 0; # print "r2a classes '$classes'\n"; foreach $cls (split / /, $classes) { if ($cls eq '') { # No problem, skip to the next thing } elsif ($cls =~ m/^$p_classname$/) { # print "r2b cls '$cls'\n"; if ($class_pop{$cls}) { # print "r2b1 vocab '$cls'\n"; if ($recurse) { ®_error("Cannot mix named templates and literal-classes in a pick-list"); &tag_vv("ex"); } else { $cnames[$ncls] = $cls; $bases[$ncls] = $tpop; $tpop += $class_pop{$cls}; # print "pop($cls) == $class_pop{$cls}, tpop now $tpop\n"; $ncls++; } } elsif ($multiclass_basenames{$cls}) { # print "r2b2 multiclass basename '$cls'\n"; ®_error("'$cls' is a multiclass basename, variant missing."); &tag_vv("ex"); } elsif ($named_template{$cls} ne '') { # print "r2b3 template '$cls'\n"; if ($tpop > 0) { ®_error("Cannot mix literal-classes and named templates in a pick-list"); &tag_vv("ex"); } else { $recurse = 1; $cnames[$ncls] = $cls; # print "cnames[$ncls] = '$cls'\n"; $ncls++; } } elsif (($cls =~ m/[a-z]/) && ($cls eq lc($cls))) { # print "r2b4 undefined '$cls'\n"; if ($is_multiclass{$cls}) { ®_error("rep2: class/template '$cls' (defined at line $is_multiclass{$cls}) has no members"); &tag_vv("ex"); } else { ®_error("rep2: class/template '$cls' is undefined"); &tag_vv("ex"); } } } else { ®_error("rep2: '$cls' is not a valid class name"); &tag_vv("ex"); } } if ($recurse) { # Pick one at random $na = int($ncls * rand); # Retrieve the template and me.meify it! # $cls = &memeify($named_template{$cnames[$na]}, $nt_lnum{$cnames[$na]}); $cls = &mf_named($cnames[$na]); &pop_vv(); return $cls; } if ($ncls == 0) { # We found none, so we'll just return the (original) full string &pop_vv(); return $oc; } $na = int($tpop * rand); # Find out what class we selected $i=0; while (($na>=$bases[$i+1]) && ($i+1 < $ncls)) { $i++; } # Convert the absolute index into an index relative to the selected class $nr = $na - $bases[$i]; $cls = $cnames[$i]; # If we're drawing from a member of a multiclass, it makes sense to # remember the class name and index for use in later isomorph # substitution if ($is_multiclass{$cls}) { $g_rep2_index = $nr; $g_rep2_class = $cls; } else { $g_rep2_index = -1; $g_rep2_class = ''; } # print "foo1 $cls: ncls $ncls tpop $tpop na $na nr $nr\n"; # We're done! &pop_vv(); # my ($k) = "$cls $nr"; print "rep2: class_member{'$k'} == $class_member{$k}\n"; return $class_member{"$cls $nr"}; } # End of rep.2 # Call rep.2 with a repetition-avoidance loop. Results are considered # 'duplicate' if the result (output of rep.2) is identical to a # previous rep.2 result, regardless of whether the argument (input to # rep.2) is the same or different. sub avoid_rep2 { my($arg) = @_; # We iteratively call &rep2 until it manages to return a result that # is distinct from any other &rep2 calls within this instance of # &me.meify. To avoid infinite looping, we try at most 27 times. my($avoid_count) = 27; my($rv); # print "ar2 arg '$arg'\n"; &push_vv($arg, '[', ']', "avoid_rep2"); while ($avoid_count > 0) { $rv = &rep2($arg); if (($rep2_results{$rv}+0) == 0) { # We found a new one! $avoid_count = 0; } else { $avoid_count--; } } $rep2_results{$rv} = 1; &pop_vv(); return $rv; } # End of avoid.rep.2 # Return a replacement for one of the list-type "bracketed" substitutions # defined by the automeme grammar (see FILE FORMAT section in block comment # above, and ex/amples in &ex.amples() routine below) sub rep3 { my($arg) = @_; my ($i, $rv, $from, $to, $rs, $mod, $gh_index, $gh_class, $oa, $t); my ($multisubst, $p1, $p2, $p3); my($extra_push) = 0; # print "foo2 gsh[0] == $g_subst_history[0]\n"; &push_vv($arg, '[', ']', "rep3"); $rv = ''; $oa = $arg; $gh_index = -1; $gh_class = ''; if ($arg eq '') { # Cut: send cut signal back to caller &pop_vv(); return '[]'; } # print "r31 arg '$arg'\n"; $i = 0; $multisubst = 0; while ($arg =~ s/^([^\@]*)\@([.0-9]+), *//) { $p1 = $1; $p2 = $2; $multisubst = 1; # print "r3ms p1 '$p1' p2 '$p2'\n"; if ((rand() < $p2) && ($i == 0)) { # Return initial string (a template, class, or literal) # with the fixed probability $rv = $p1; $i = 1; } # Else, we'll continue pulling off /FOO@0.123, / pieces... } if ($multisubst) { # This block was a multiple-choice with frequencies if ($i == 0) { # It was the last one, copy from $arg $rv = $arg; } # In all cases, we allow a class or template name # %%% recursion passes through here; callers checked in rep.2 if ($rv =~ m/\[/) { ®_error("Nested bracket in pick-list item '$rv'"); &tag_vv("ex"); } elsif (($rv =~ m/,/) || ($class_pop{$rv} > 0) || ($named_template{$rv} ne '')) { $rv = &avoid_rep2($rv); # Remember the class name and item index for later isomorphism $gh_index = $g_rep2_index; $gh_class = $g_rep2_class; } # now substitute numbered 'variables' while ($rv =~ m|^(.*)\$([0-9]+)(.*)$|) { $p1 = $1; $i = $2; $p3 = $3; if ($i >= $n_meme_substitutions) { ®_error("There is no '\$$i' in history (a)"); &tag_vv("ex"); $rv = $p1 . " " . $p3; } else { $rv = $p1 . $g_subst_history[$i] . $p3; } } } elsif ($arg =~ m|^\$([0-9]+) *(/.*/.*) *$|) { # numbered 'variable' with (one or more) regexp replacement(s) # demarcated by at least two slash characters '/' $i = $1; $rs = $2; # Check for named regex if ($rs =~ m|^/($p_classname)/$|) { # With just two slashes, we expect a named regex. $t = $1; if ($named_regex{$t} ne '') { $rs = $named_regex{$t}; $extra_push++; &push_vv($rs, 'regex: ', '', "named_regex"); } elsif ($g_sh_class[$i] ne '') { # Perhaps they're trying to do an isomorph ®_error("I got '/$t/', but isomorph syntax has no trailing slash"); &tag_vv("ex"); } else { ®_error("No named regex '$t'"); &tag_vv("ex"); } } else { # This is okay, it's probably a literal regex. # ®_error("'$rs' is not of the named-regex form"); } if ($i >= $n_meme_substitutions) { ®_error("There is no '\$$i' in history (b)"); &tag_vv("ex"); $rv = " "; } else { $rv = $g_subst_history[$i]; } $rs .= ', '; # print "rep.3 |$arg|$rv|$rs|\n"; # Bite off the grep-forms one at a time. Each is expected to # be of the form /foo/bar/x where the match 'foo' must be non-null, # and the trailing modifiers 'x' can include 'g' or 'i' but nothing # else. while ($rs =~ m|^/([^/]+)/([^/]*)/([gi]*), *(.*)$|) { $from = $1; $to = $2; $mod = $3; $rs = $4; $from =~ tr/{}/[]/; # print "rep.3 '$rv' /$from/$to/"; if (&validate_regexp($from)) { $rv = &eval_repl($rv, $from, $to, $mod); } else { ®_error("Could not validate regex /$from/ (b)"); &tag_vv("ex"); } # print " -> '$rv'\n"; } # See if we are left with anything; this is a syntax error. if ($rs ne '') { ®_error("Malformed or partial regex '$rs'"); &tag_vv("ex"); } } elsif ($arg =~ m|^\$([0-9]+) */([^/]+)$|) { # numbered 'variable' with isomorph substitution $i = $1; $rs = $2; # print "foo $i $g_sh_class[$i]\n"; if ($i >= $n_meme_substitutions) { ®_error("There is no '\$$i' in history (c)"); &tag_vv("ex"); $rv = " "; } elsif ($g_sh_class[$i] eq '') { # We cannot isomorph, just return the original $rv = $g_subst_history[$i]; if ($named_regex{$rs} ne '') { ®_error("In '[\$$i/$rs]', the named regex needs a trailing slash"); &tag_vv("ex"); } else { ®_error("'$g_sh_oa[$i]' is not a multiclass member: rep.3('$oa')"); &tag_vv("ex"); } } else { # Get isomorph class $rv = $mc_isomorphs{"$g_sh_class[$i]/$rs"}; # print "set rv to mc_isomorphs{'$g_sh_class[$i]/$rs' => $rv\n"; if ($rv eq '') { # They asked for a nonexistent isomorph $rv = $g_subst_history[$i]; # print "foo-err2\n"; if ($named_regex{$rs} ne '') { ®_error("Named regex '$rs' needs a trailing slash"); &tag_vv("ex"); } else { ®_error("'$g_sh_class[$i]' has no isomorph '$rs'"); &tag_vv("ex"); } } else { $rv = $class_member{"$rv $g_sh_index[$i]"}; } } } elsif ($arg =~ m|^\$([0-9]+) *$|) { $i = $1; # numbered 'variable': return the Nth substitution from those we have # generated so far for this meme. We return directly so as not to # define another history item, which would be redundant. if ($i >= $n_meme_substitutions) { ®_error("There is no '\$" . "$i' in history (d)"); &tag_vv("ex"); $rv = " "; } else { $rv = $g_subst_history[$i]; } if ($extra_push) { &pop_vv(); $extra_push = 0; } &pop_vv(); return $rv; } elsif ($arg =~ m|^\$([0-9]+)(.*)$|) { ®_error("Unknown modifier '$2' on history variable"); &tag_vv("ex"); } else { # default case: $arg is a list of one or more templates or classes # implementation differs depending on whether equal weight is given per # class or per class-member. # %%% recursion passes through here; callers checked in rep2 $rv = &avoid_rep2($arg); # Remember the class name and item index for later isomorphism $gh_index = $g_rep2_index; $gh_class = $g_rep2_class; } # This bracketed thing defines a new history variable. Each history variable # remembers its expansion, its class name and item index (in cases where the # variable was created by instantiating a class) and the original argument # to rep.3 for use in error reporting. $g_subst_history[$n_meme_substitutions] = $rv; $g_sh_index[$n_meme_substitutions] = $gh_index; $g_sh_class[$n_meme_substitutions] = $gh_class; $g_sh_oa[$n_meme_substitutions] = $oa; $n_meme_substitutions++; if ($extra_push) { &pop_vv(); $extra_push = 0; } &pop_vv(); return $rv; } # End of rep.3 $complexity++; if ($complexity > 143) { ®_error("Recursive template too complex: $base_template"); &tag_vv("ex"); &pop_vv(); return ""; } # Do it! # # The main loop of meme.ify() is effectively performing a Markov algorithm # (see en.wikipedia.org/wiki/Markov_algorithm) which takes a starting string # and iteratively replaces substrings with expansions. The expansion can all # be done in a single Perl expression: # # $meme =~ s/\[([^]]+)\]/&rep3($1)/ge; # # but we instead code the loop explicitly, to permit logging. $j = 0; while (($j < 143) && ($meme =~ m/\[([^]]+)\]/)) { $r1 = $1; if ($g_logall) { &mladd($meme, $j); } $r1 = &rep3($r1); $meme =~ s/\[([^]]+)\]/$r1/e; $j++; } if ($j >= 143) { ®_error("Markov substitution looped too long, from: $base_template"); &tag_vv("ex"); # %%% here it would be more useful to the user to display the mfy_log } else { if ($g_logall) { &mladd($meme, $j); } } # If there was a cut, remove all the material prior to it. (This also removes # multiple cuts, e.g. in "template foo: [bar][][baz][]qux $0 quux $1[]", but # that syntax is presently blocked by the input scanner) if ($meme =~ m/^.*\[\]/) { $meme =~ s/^.*\[\]//; if ($g_logall) { &mladd($meme, -2); } } &pop_vv(); return $meme; } # End of me.meify # This is a top-level routine (not involved in the recursion). It # calls meme.ify() and performs %-escape translation on the result. sub mfy2 { my($template) = @_; my($rv); &push_vv($template, '[', ']', "mfy2"); $complexity = 0; $mlptr = 0; $mli_prev = 0; $mlnest = 0; $base_template = $template; $rv = &memeify($template); # De-escape any special literals $rv =~ s/\%([0-9a-fA-F]{2})/chr(hex($1))/ge; &pop_vv(); return $rv; } # End of mfy.2 sub xmp1 { my($t) = @_; print "\n"; print "Template: $t\n"; $g_root_lnum = -2; $g_vv_sptr = 0; # &mladd("Builtin example '$t'", -3); $t = &mfy2($t); if ($g_Dm) { &dump_mfylog(); } print (" Result: " . $t . "\n"); } sub examples { # Pass "-n 0" as arguments to see these demonstration test cases print "Meme examples:\n"; # Return an adjective &xmp1("[adjective]"); # Return a present-continuous (aka present progressive) or past-tense verb &xmp1("[verb-tr-cont, verb-tr-past]"); # Return a random noun, but half the time it's HODOR. In Perl # @ inside "" is special so we use '' &xmp1('[HODOR@0.5, noun-singular]'); # Return NORTH 1/4 of the time, EAST 1/3 of the remaining time, # SOUTH 1/2 of the remaining time, and WEST the rest of the time. &xmp1('[NORTH@0.25, EAST@0.3333, SOUTH@0.5, WEST]'); # Example of a full expansion &xmp1('I CAN [verb-tr-inf] [HOTDOG@0.3, noun-singular] PLEEZ?'); # This one uses the % escape character to include literal brackets [] in the result &xmp1('%5b[CITATION@0.3, noun-singular, noun-mass] [NEEDED@0.3, verb-tr-past]%5d'); # A few examples that use history substitition &xmp1('I PUT [noun-mass] IN YOUR [$0]...'); # This snowclone performs a sort of Pig Latin transformation. The prototype # is "Bless you, ants. Blants." but we want it to work on plural nouns # that start with a consonant. We use a regexp to remove any string of zero # or more non-vowels from the beginning of $0. Note the use of braces {} # to represent a character-class within the regexp. &xmp1('BLESS YOU, [noun-mp, character]. BL[$0/^{^AEIUO}+//].'); # Here we use a regexp to remove the article (or any initial word) from a # noun-singular-article &xmp1('I HAZ [noun-singular-article]. U CAN HAS MY [$0/^{^ }+ //].'); # For the "Dial M for murder" template we need to choose the full word # 'murder' before being able to extract its first letter. To achieve this # we use the cut operation '[]', which causes the parser to evaluate # the preceding portion and then discard it. &xmp1('[noun-all@0.67, character][]DIAL [$0/^(.).*/$1/] FOR [$0]'); # Another example, where we select a [place] and then remove any initial 'THE' &xmp1('[place][]PH\'NGLUI MGLW\'NAFH [character] [$0/^THE //] WGAH\'NAGL FHTAGN'); # A sometimes flawed attempt at verb conjugation. The prototype is # "Hating haters gonna hate", and we attempt to transform the continuous # verb form ("hating") into the plural actor form ("haters") by matching the # ING and changing it to ERS. We than attempt to form the infinitive ("hate") # by simply removing ING, but that doesn't always work. Here we show a few # exceptions &xmp1('[verb-tr-cont, verb-intr-cont] [$0/YING/IERS/, /ING/ERS/] GONNA [$0/ING//, /LLAT$/LLATE/, /DOODL /DOODLE /, /HOTDOGG/HOTDOG/]'); # Demonstrating the avoidance algorithm. &xmp1("[avoid] [avoid] [avoid] [avoid]"); # Another attempt at verb transformation &xmp1('[verb-intr-cont][verb-intr-cont][$1/ING/IN\'/][]THEY SEE ME [$0/ING/IN\'/], THEY [HATIN\'@0.67, $2]'); # Using a multiclass &xmp1('I WEAR [noun-mineral-art] NOW. [$0/-plural] ARE COOL.'); # Use a multiclass to get a plural form, then extract the bare singular from # the singular isomorph &xmp1('[noun-animal-plural, noun-mineral-plural][$0/-art][$1/^{^ }+ //][][$0] FOR THE [$2] GOD!'); # Two-level recursion &xmp1("[test_recurse1] ARE [test_recurse2] [noun]."); # Recursion with local variables at both levels: The main meme and the sub-template '[verbmeme]' both use $0, and the main meme extracts parts of the text returned by the subroutine. &xmp1('[character][character][][$1] TURNED TO LOOK AT [$0]. I ASKED [$0], AND XE ANSWERED, "[verbmeme]". "REALLY, [$0]?" I REPLIED, "[$2/^.* ({^ }+)$/$1/]? WHY NOT [verb-tr-inf] THEM?" [$1] LOOKED BACK AT ME AND REPLIED, "BECAUSE [$4/-cont] [$2/^.* ({^ }+):.*$/$1/] IS [****DISH@0.5, adjective-negative]."'); } # End of exam.ples # Small caps using Unicode my %sc_map = ( 'A' => 'ᴀ', 'B' => 'ʙ', 'C' => 'ᴄ', 'D' => 'ᴅ', 'E' => 'ᴇ', 'F' => 'ꜰ', 'G' => 'ɢ', 'H' => 'ʜ', 'I' => 'ɪ', 'J' => 'ᴊ', 'K' => 'ᴋ', 'L' => 'ʟ', 'M' => 'ᴍ', 'N' => 'ɴ', 'O' => 'ᴏ', 'P' => 'ᴘ', 'Q' => 'ϙ', 'R' => 'ʀ', 'S' => 'ꜱ', 'T' => 'ᴛ', 'U' => 'ᴜ', 'V' => 'ᴠ', 'W' => 'ᴡ', 'X' => 'х', 'Y' => 'ʏ', 'Z' => 'ᴢ' ); sub smallcaps { my($s) = @_; my($rv, $c, $m); $rv = ''; foreach $c (split //, $s) { $m = $sc_map{$c}; $rv .= ($m eq '') ? $c : $m; } return $rv; } # End of small.caps # Italics using Unicode my %it_map = ( 'A' => '𝐴', 'B' => '𝐵', 'C' => '𝐶', 'D' => '𝐷', 'E' => '𝐸', 'F' => '𝐹', 'G' => '𝐺', 'H' => '𝐻', 'I' => '𝐼', 'J' => '𝐽', 'K' => '𝐾', 'L' => '𝐿', 'M' => '𝑀', 'N' => '𝑁', 'O' => '𝑂', 'P' => '𝑃', 'Q' => '𝑄', 'R' => '𝑅', 'S' => '𝑆', 'T' => '𝑇', 'U' => '𝑈', 'V' => '𝑉', 'W' => '𝑊', 'X' => '𝑋', 'Y' => '𝑌', 'Z' => '𝑍' ); sub italics { my($s) = @_; my($rv, $c, $m); $rv = ''; foreach $c (split //, $s) { $m = $it_map{$c}; $rv .= ($m eq '') ? $c : $m; } return $rv; } # End of ital.ics sub underline { my($s) = @_; my($rv, $c); $rv = ''; foreach $c (split //, $s) { if ($c eq ' ') { $rv = $rv . "_"; # "　͟"; } else { $rv = $rv . $c . "͟"; } } return $rv; } # End of under.line # Beanish phonetic mappings are from https://beanishlang.wordpress.com/ my %bean_equiv = ( 'ᘝ' => 'ᘖ', 'ᐨ' => 'ᐧ' ); my %bean_pho = ( 'ᕬ' => 'Z', # ʒ zh as in fusion (voiced palato-alveolar sibilant) 'ᓭ' => 'AE', # a a as in hat (open front unrounded vowel) 'ᘛ' => 'EY', # e ay as in play (close-mid front unrounded vowel) 'ᖉ' => 'OW', # o o as in go (close-mid back rounded vowel) 'ᒣ' => 'IY', # i ee as in free (close front unrounded vowel) 'ᖊ' => 'UW', # u oo as in boot (close back rounded vowel) 'ᘊ' => 's', # s s as in sip (voiceless alveolar sibilant) 'ᑫ' => 'EH', # ɛ e as in bed (open-mid front unrounded vowel) 'ᘈ' => 'AO', # ɔ ough as in thought (open-mid back rounded vowel) 'ᘖ' => 't', # t t as in tick (voiceless alveolar stop) 'ᖚ' => 'r', # ə a as in tina (mid central vowel) 'ᔪ' => 'UW', # y ü as in German über (close front rounded vowel) %%% 'ᔭ' => 'UW', # ɯ oo as in California dialect goose (close back unrounded vowel) %%% 'ᑕ' => 'p', # p p as in pack (voiceless bilabial stop) 'ᖽ' => 'UX', # ɐ u as in nut (near-open central vowel) 'ᖆ' => 'm', # m m as in him (bilabial nasal) 'ᕋ' => 'b', # b b as in aback (voiced bilabial stop) 'ᕒ' => 'S', # ʃ sh as in sheep (voiceless palato-alveolar sibilant) 'ᓄ' => 'N', # ŋ ng as in sing (velar nasal) 'ᖗ' => 'r', # r r as in Spanish quiero (aveolar trill) %%% 'ᑲ' => 'f', # f f as in fill (voiceless labiodental fricative) 'ᐧ' => ' ', # _ⁿ nasal release %%% 'ᑦ' => ' ', # _ʰ aspirated %%% 'ᐣ' => ' ', # _: long vowel or geminated consonant %%% ); # Swap in the espeak transliteration %bean_pho = ( 'ᕬ' => 'Z', # ʒ 'ᓭ' => 'a', # a 'ᘛ' => 'eI', # e 'ᖉ' => 'oU', # o 'ᒣ' => 'i', # i 'ᖊ' => 'u:', # u 'ᘊ' => 's', # s 'ᑫ' => 'E', # ɛ 'ᘈ' => 'O:', # ɔ 'ᘖ' => 't', # t 'ᖚ' => '@', # ə 'ᔪ' => 'u:', # y %%% 'ᔭ' => 'u:', # ɯ %%% 'ᑕ' => 'p', # p 'ᖽ' => 'V', # ɐ 'ᖆ' => 'm', # m 'ᕋ' => 'b', # b 'ᕒ' => 'S', # ʃ 'ᓄ' => 'N', # ŋ 'ᖗ' => 'r', # r %%% 'ᑲ' => 'f', # f 'ᐧ' => ' ', # _ⁿ %%% 'ᑦ' => ' ', # _ʰ %%% 'ᐣ' => ' ', # _: %%% ); # Convert embedded runs of Beanish to espeak phonetics. # %%% Clearly something else has to be done on MacOS sub beanish_phon { my($in) = @_; my($pho, $c, $b, $beanrun); $b = $in; $pho = ''; foreach $c (split(//, $b)) { if ($bean_equiv{$c} ne '') { $c = $bean_equiv{$c}; } if ($bean_pho{$c} ne '') { $c = $bean_pho{$c}; if ($c eq ' ') { $c = ''; } if ($beanrun == 0) { # Beginning of a beanish run $c = '[[' . $c; $beanrun = 1; } } else { $c = lc($c); if ($beanrun) { # End of a beanish run $c = ']]' . $c; $beanrun = 0; } } $pho .= $c; } if ($beanrun) { $pho .= ']]'; } return $pho; } my ($g_utf); my ($g_output) = ""; # Given a line of meme output, convert RHTF markup to UTF8 and print out. sub printout { my($prefix, $meme) = @_; my($p1, $p2, $p3, $ctrla); my($l, $m2); $ctrla = "\001"; $l = "$prefix$meme"; if ($g_utf) { while ($l =~ m/^([^\$]*)\$([^\$]+)\$(.*)$/) { $p1 = $1; $p2 = $2; $p3 = $3; $l = $p1 . &smallcaps($p2) . $p3; } $l =~ s/__/$ctrla/g; while ($l =~ m/^([^_]*)_([^_]+)_(.*)$/) { $p1 = $1; $p2 = $2; $p3 = $3; $l = $p1 . &underline($p2) . $p3; } $l =~ s/$ctrla/_/g; $l =~ s/\*\*/$ctrla/g; while ($l =~ m/^([^*]*)\*([^*]+)\*(.*)$/) { $p1 = $1; $p2 = $2; $p3 = $3; $l = $p1 . &italics($p2) . $p3; } # UTF-8 based italics would go here $l =~ s/$ctrla/*/g; } if ($g_say) { $m2 = lc($meme); #while ($m2 =~ m/^([^\$]*)\$([^\$]+)\$(.*)$/) { # $m2 = "$1$2$3"; #} #$m2 =~ s/__/$ctrla/g; #while ($m2 =~ m/^([^_]*)_([^_]+)_(.*)$/) { # $m2 = "$1$2$3"; #} #$m2 =~ s/$ctrla//g; #$m2 =~ s/\*\*//g; #$m2 =~ s/$ctrla//g; $m2 =~ s/[\_\*\$]//g; $m2 =~ s/\&\&/\&/g; $m2 = &beanish_phon($m2); print "$meme\n$m2\n"; system($saybin, $m2); } else { if ($g_identify_source_template) { print "$g_root_template: "; $g_output .= "$g_root_template: "; } print $l; $g_output .= $l; } } # End of print.out # Determine if a line of text (output of a meme template) matches the # nominate, veto, require parameters given on the command line sub match2 { my($l) = @_; my($i, $rv); if ($g_icase) { $l = lc($l); } $rv = 0; for($i=0; $i<$num_nominate; $i++) { if ($l =~ m/$kw_nominate[$i]/) { # print " nom '$kw_nominate[$i]'"; $rv = 1; } } if ($num_nominate == 0) { $rv = 1; } for($i=0; $i<$num_require; $i++) { if (!($l =~ m/$kw_require[$i]/)) { # print " require '$kw_require[$i]'\n"; return 0; } } for($i=0; $i<$num_veto; $i++) { if ($l =~ m/$kw_veto[$i]/) { # print " veto '$kw_veto[$i]'\n"; return 0; } } #print ": rv $rv'\n"; return $rv; } # End of match.2 $| = 1; # Set defaults and parse arguments my ($arg, $i, $t); my($g_prefix) = ''; my($g_pbcopy) = 0; my($g_errors_fatal) = 0; while($arg = shift) { if ($arg =~ m/^-[-]?h(elp)?$/) { print $help; exit(0); } elsif ($arg eq '-c') { # Copy to clipboard $g_pbcopy = 1; } elsif ($arg eq '-d') { # Use user-specified memebase $g_memefile = shift; } elsif ($arg eq '-Dm') { # Debug: show every step of Markov substitution $g_logall = 1; $g_Dm = 1; } elsif ($arg eq '-ef') { $g_errors_fatal = 1; } elsif ($arg eq '-i') { # Ignore case (for pattern matching arguments) $g_icase = 1; } elsif ($arg eq '-ist') { # Identify source templates $g_identify_source_template = 1; } elsif ($arg eq '-lc') { # Limit characters: max length for memes $g_limit_characters = shift; } elsif ($arg eq '-mt') { # Maximum tries $g_maxtries = shift; if ($g_maxtries < 27) { $g_maxtries = 27; } } elsif ($arg eq '-n') { # Produce N memes $g_iterations = shift; } elsif ($arg eq '-o') { # Use original memebase $g_memefile = "automeme.txt"; } elsif ($arg eq '-prefix') { # Add a string to the beginning of each output meme $g_prefix = shift; print "prefix: $g_prefix\n" if ($g_verbose); } elsif ($arg eq '-s') { $g_say = 1; } elsif ($arg eq '-subject') { # Set options suitable for an OTT post subject line $g_prefix = '1190: "Time": '; $g_limit_characters = 60 - length($g_prefix); print "length limit: g_limit_characters; prefix: $g_prefix\n" if ($g_verbose); } elsif ($arg eq '-t') { $g_literal_template = shift; } elsif ($arg eq '-utf') { $g_utf = 1; } elsif ($arg eq '-v') { $g_verbose = 1; } elsif ($arg eq '-vv') { $g_verbose = 2; } elsif ($arg eq '-vvv') { $g_verbose = 3; $g_logall = 1; } elsif ($arg =~ m|^-(.+)$|) { $t = $1; if (&validate_regexp($t, 1)) { $t = lc($t) if ($g_icase); $kw_veto[$num_veto++] = $t; print "veto $t\n" if ($g_verbose); } } elsif ($arg =~ m|^\+(.+)$|) { $t = $1; if (&validate_regexp($t, 1)) { $t = lc($t) if ($g_icase); $kw_require[$num_require++] = $t; print "require $t\n" if ($g_verbose); } } elsif (&validate_regexp($arg, 1)) { $arg = lc($arg) if ($g_icase); $kw_nominate[$num_nominate++] = $arg; print "nominate $arg\n" if ($g_verbose); } } if ($num_veto + $num_require + $num_nominate == 0) { # No keywords given, so we'll match anything. $kw_nominate[$num_nominate++] = '.'; } if (($num_veto == 0) && ($num_require == 0)) { # No keywords have operators -- treat them as if they are all '+' @kw_require = @kw_nominate; $num_require = $num_nominate; print "No vetos or requires; treating all patterns as 'required'\n" if ($g_verbose); @kw_nominate = (); $num_nominate = 0; } if ($g_say) { my($p); if (0) { } elsif ($p = "/usr/bin/espeak", (-x $p)) { $saybin = $p; } elsif ($p = "/usr/bin/say", (-x $p)) { $saybin = $p; } else { "espeak or say not found; ignoring -s option.\n"; $g_say = 0; } } # Read in the memes database &load_data(); # Put brackets around literal template, if that makes sense if ($class_pop{$g_literal_template} > 0) { $g_literal_template = "[$g_literal_template]"; } elsif ($named_template{$g_literal_template} ne '') { $g_literal_template = "[$g_literal_template]"; } if ($g_iterations <= 0) { print ("There are " . $class_pop{'t:'} . " meme templates.\n"); &examples(); } # Produce N memes my ($m, $gg); for($i=0; $i<$g_iterations; $i++) { # Get memes until we have one that's short enough $gg = 1; while($gg) { if ($g_literal_template ne '') { $g_root_template = $g_literal_template; $g_root_lnum = -1; # &mladd("Command line -t '$g_root_template,'", -4); } else { ($g_root_template, $g_root_lnum) = &rep1('t:'); print "\nSource line $g_root_lnum, anonymous template: $g_root_template\n" if ($g_verbose); } $g_vv_sptr = 0; $m = &mfy2($g_root_template); if ($g_error_count && $g_errors_fatal) { # End both loops $gg = 0; $i = $g_iterations; } else { $gg++; } if ($gg > $g_maxtries) { print SERR "I have tried $g_maxtries times without finding"; print SERR (($i > 0) ? " enough memes" : " any meme"); print SERR " matching the given constraints.\n"; exit(-1); } elsif (!(&match2($m))) { # Doesn't match the filter patterns; keep looking. print "Does not match filter pattern(s)\n" if ($g_verbose); } elsif (length($m) > $g_limit_characters) { # Too long, keep looking. print "Exceeds length $g_limit_characters\n" if ($g_verbose); } else { # All conditions met! $gg = 0; } } if ($g_Dm) { &dump_mfylog(); } &printout($g_prefix, $m); print "\n"; } if ($g_pbcopy) { open (my $PBC, "| pbcopy"); print $PBC $g_output; close $PBC; }